From b8be80cb8b275ce440cfc960dfa0fb13e3192f90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 22 Mar 2025 00:48:41 +0100 Subject: [PATCH 01/45] transformer overhaul --- hcl2/api.py | 2 +- hcl2/{transformer.py => dict_transformer.py} | 4 + hcl2/rule_transformer.py | 101 ++++ hcl2/serialization.py | 496 +++++++++++++++++++ test/helpers/hcl2_helper.py | 2 +- test/unit/test_dict_transformer.py | 2 +- 6 files changed, 604 insertions(+), 3 deletions(-) rename hcl2/{transformer.py => dict_transformer.py} (99%) create mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/serialization.py diff --git a/hcl2/api.py b/hcl2/api.py index 399ba929..1cec02a2 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -3,7 +3,7 @@ from lark.tree import Tree from hcl2.parser import parser, reconstruction_parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer diff --git a/hcl2/transformer.py b/hcl2/dict_transformer.py similarity index 99% rename from hcl2/transformer.py rename to hcl2/dict_transformer.py index 382092d6..64c58bcb 100644 --- a/hcl2/transformer.py +++ b/hcl2/dict_transformer.py @@ -277,6 +277,10 @@ def heredoc_template_trim(self, args: List) -> str: def new_line_or_comment(self, args: List) -> _DiscardType: return Discard + # def EQ(self, args: List): + # print("EQ", args) + # return args + def for_tuple_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py new file mode 100644 index 00000000..8f0b922a --- /dev/null +++ b/hcl2/rule_transformer.py @@ -0,0 +1,101 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.visitors import _Leaf_T, _Return_T, Discard + +from hcl2.serialization import ( + LarkRule, + LarkToken, + StartRule, + BodyRule, + BlockRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + ExprTermRule, + ConditionalRule, + BinaryOpRule, + BinaryOperatorRule, + BinaryTermRule, + UnaryOpRule, + AttributeRule, + NewLineOrCommentRule, +) + +ArgsType = List[Union[Token, Tree]] + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def start(self, args: ArgsType) -> StartRule: + return StartRule(args) + + def body(self, args: ArgsType) -> BodyRule: + return BodyRule(args) + + def block(self, args: ArgsType) -> BlockRule: + return BlockRule(args) + + def identifier(self, args: ArgsType) -> IdentifierRule: + return IdentifierRule(args) + + def int_lit(self, args: ArgsType) -> IntLitRule: + return IntLitRule(args) + + def float_lit(self, args: ArgsType) -> FloatLitRule: + return FloatLitRule(args) + + def string_lit(self, args: ArgsType) -> StringLitRule: + return StringLitRule(args) + + def expr_term(self, args: ArgsType) -> ExprTermRule: + return ExprTermRule(args) + + def conditional(self, args: ArgsType) -> ConditionalRule: + return ConditionalRule(args) + + def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: + return BinaryOperatorRule(args) + + def binary_term(self, args: ArgsType) -> BinaryTermRule: + return BinaryTermRule(args) + + def unary_op(self, args: ArgsType) -> UnaryOpRule: + return UnaryOpRule(args) + + def binary_op(self, args: ArgsType) -> BinaryOpRule: + return BinaryOpRule(args) + + def attribute(self, args: ArgsType) -> AttributeRule: + return AttributeRule(args) + + def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args) + return Discard + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) diff --git a/hcl2/serialization.py b/hcl2/serialization.py new file mode 100644 index 00000000..15d75caa --- /dev/null +++ b/hcl2/serialization.py @@ -0,0 +1,496 @@ +from abc import ABC, abstractmethod +from json import JSONEncoder +from typing import List, Any, Union, Tuple, Optional + +from lark import Tree, Token + +ArgsType = List["LarkElement"] + + +def is_dollar_string(value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + + return f"({value})" + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + _classes = [] + + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children): + self._children: List[LarkElement] = children + + def __init_subclass__(cls, **kwargs): + cls._classes.append(cls) + + def __repr__(self): + return f"" + + +class StartRule(LarkRule): + + _children: Tuple["BodyRule"] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> "BodyRule": + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + "NewLineOrCommentRule", + "AttributeRule", + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class BlockRule(LarkRule): + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List["IdentifierRule"]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result + + +class IdentifierRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "identifier" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IntLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "int_lit" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class FloatLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "float_lit" + + def __init__(self, children): + print("float_lit", children) + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class StringLitRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined()[1:-1] + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self._parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(ExprTermRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children): + super().__init__(children) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryOperatorRule(LarkRule): + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + +class BinaryTermRule(LarkRule): + _children: Tuple[ + BinaryOperatorRule, + Optional["NewLineOrCommentRule"], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children): + if len(children) == 2: + children.insert(1, None) + super().__init__(children) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional["NewLineOrCommentRule"]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class UnaryOpRule(Expression): + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def unary_operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + "NewLineOrCommentRule", + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py index 5acee1e7..c39ee7fb 100644 --- a/test/helpers/hcl2_helper.py +++ b/test/helpers/hcl2_helper.py @@ -3,7 +3,7 @@ from lark import Tree from hcl2.parser import parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class Hcl2Helper: diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py index 122332eb..baad5ba9 100644 --- a/test/unit/test_dict_transformer.py +++ b/test/unit/test_dict_transformer.py @@ -2,7 +2,7 @@ from unittest import TestCase -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class TestDictTransformer(TestCase): From e39b42918b4f6dca5694bd836faa5ee649b8e560 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 26 Mar 2025 21:28:54 +0100 Subject: [PATCH 02/45] reorganize code --- hcl2/rule_transformer.py | 101 ---- hcl2/rule_transformer/__init__.py | 0 hcl2/rule_transformer/json.py | 12 + hcl2/rule_transformer/rules/__init__.py | 0 hcl2/rule_transformer/rules/abstract.py | 93 ++++ hcl2/rule_transformer/rules/base.py | 122 +++++ hcl2/rule_transformer/rules/expression.py | 187 +++++++ hcl2/rule_transformer/rules/token_sequence.py | 63 +++ hcl2/rule_transformer/rules/whitespace.py | 36 ++ hcl2/rule_transformer/transformer.py | 118 +++++ hcl2/rule_transformer/utils.py | 23 + hcl2/serialization.py | 496 ------------------ 12 files changed, 654 insertions(+), 597 deletions(-) delete mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/rule_transformer/__init__.py create mode 100644 hcl2/rule_transformer/json.py create mode 100644 hcl2/rule_transformer/rules/__init__.py create mode 100644 hcl2/rule_transformer/rules/abstract.py create mode 100644 hcl2/rule_transformer/rules/base.py create mode 100644 hcl2/rule_transformer/rules/expression.py create mode 100644 hcl2/rule_transformer/rules/token_sequence.py create mode 100644 hcl2/rule_transformer/rules/whitespace.py create mode 100644 hcl2/rule_transformer/transformer.py create mode 100644 hcl2/rule_transformer/utils.py delete mode 100644 hcl2/serialization.py diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py deleted file mode 100644 index 8f0b922a..00000000 --- a/hcl2/rule_transformer.py +++ /dev/null @@ -1,101 +0,0 @@ -# pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - -from lark import Transformer, Tree, Token -from lark.visitors import _Leaf_T, _Return_T, Discard - -from hcl2.serialization import ( - LarkRule, - LarkToken, - StartRule, - BodyRule, - BlockRule, - IdentifierRule, - IntLitRule, - FloatLitRule, - StringLitRule, - ExprTermRule, - ConditionalRule, - BinaryOpRule, - BinaryOperatorRule, - BinaryTermRule, - UnaryOpRule, - AttributeRule, - NewLineOrCommentRule, -) - -ArgsType = List[Union[Token, Tree]] - - -class RuleTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a tree of LarkRule instances - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments - super().__init__() - - def start(self, args: ArgsType) -> StartRule: - return StartRule(args) - - def body(self, args: ArgsType) -> BodyRule: - return BodyRule(args) - - def block(self, args: ArgsType) -> BlockRule: - return BlockRule(args) - - def identifier(self, args: ArgsType) -> IdentifierRule: - return IdentifierRule(args) - - def int_lit(self, args: ArgsType) -> IntLitRule: - return IntLitRule(args) - - def float_lit(self, args: ArgsType) -> FloatLitRule: - return FloatLitRule(args) - - def string_lit(self, args: ArgsType) -> StringLitRule: - return StringLitRule(args) - - def expr_term(self, args: ArgsType) -> ExprTermRule: - return ExprTermRule(args) - - def conditional(self, args: ArgsType) -> ConditionalRule: - return ConditionalRule(args) - - def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: - return BinaryOperatorRule(args) - - def binary_term(self, args: ArgsType) -> BinaryTermRule: - return BinaryTermRule(args) - - def unary_op(self, args: ArgsType) -> UnaryOpRule: - return UnaryOpRule(args) - - def binary_op(self, args: ArgsType) -> BinaryOpRule: - return BinaryOpRule(args) - - def attribute(self, args: ArgsType) -> AttributeRule: - return AttributeRule(args) - - def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args) - return Discard - - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) - - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rule_transformer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py new file mode 100644 index 00000000..647b6683 --- /dev/null +++ b/hcl2/rule_transformer/json.py @@ -0,0 +1,12 @@ +from json import JSONEncoder +from typing import Any + +from hcl2.rule_transformer.rules.abstract import LarkRule + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py new file mode 100644 index 00000000..37f63a03 --- /dev/null +++ b/hcl2/rule_transformer/rules/abstract.py @@ -0,0 +1,93 @@ +from abc import ABC, abstractmethod +from typing import Any, Union, List, Optional + +from lark import Token, Tree +from lark.tree import Meta + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken +COLON_TOKEN = LarkToken +LPAR_TOKEN = LarkToken # left parenthesis +RPAR_TOKEN = LarkToken # right parenthesis + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + @property + def children(self) -> List[LarkElement]: + return self._children + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children, meta: Optional[Meta] = None): + self._children = children + self._meta = meta + + def __repr__(self): + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py new file mode 100644 index 00000000..f46d8039 --- /dev/null +++ b/hcl2/rule_transformer/rules/base.py @@ -0,0 +1,122 @@ +from typing import Tuple, Any, List, Union, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.token_sequence import IdentifierRule + +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + NewLineOrCommentRule, + AttributeRule, + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class StartRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> BodyRule: + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BlockRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List[IdentifierRule]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py new file mode 100644 index 00000000..2a38912a --- /dev/null +++ b/hcl2/rule_transformer/rules/expression.py @@ -0,0 +1,187 @@ +from abc import ABC +from typing import Any, Tuple, Optional, List + +from lark import Tree, Token +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import ( + LarkRule, + LarkToken, + LPAR_TOKEN, + RPAR_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.utils import ( + wrap_into_parentheses, + to_dollar_string, + unwrap_dollar_string, +) + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + + _children: Tuple[ + Optional[LPAR_TOKEN], + Optional[NewLineOrCommentRule], + Expression, + Optional[NewLineOrCommentRule], + Optional[RPAR_TOKEN], + ] + + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children, meta: Optional[Meta] = None): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children, meta) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self.parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(LarkRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryTermRule(LarkRule): + + _children: Tuple[ + BinaryOperatorRule, + Optional[NewLineOrCommentRule], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children, meta: Optional[Meta] = None): + if len(children) == 2: + children.insert(1, None) + super().__init__(children, meta) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional[NewLineOrCommentRule]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + NewLineOrCommentRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class UnaryOpRule(Expression): + + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py new file mode 100644 index 00000000..66e22e2f --- /dev/null +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -0,0 +1,63 @@ +from abc import ABC +from typing import Tuple, Any, List, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken + + +class TokenSequenceRule(LarkRule, ABC): + + _children: Tuple[TokenSequence] + + def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IdentifierRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "identifier" + + def serialize(self) -> str: + return str(super().serialize()) + + +class IntLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "int_lit" + + def serialize(self) -> float: + return int(super().serialize()) + + +class FloatLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "float_lit" + + def serialize(self) -> float: + return float(super().serialize()) + + +class StringLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> str: + return str(super().serialize()) + + +class BinaryOperatorRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> str: + return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py new file mode 100644 index 00000000..f56a386e --- /dev/null +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -0,0 +1,36 @@ +from typing import Optional, List, Any + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py new file mode 100644 index 00000000..9e6af6ef --- /dev/null +++ b/hcl2/rule_transformer/transformer.py @@ -0,0 +1,118 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.tree import Meta +from lark.visitors import _Leaf_T, Discard, v_args + +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.base import ( + StartRule, + BodyRule, + BlockRule, + AttributeRule, +) +from hcl2.rule_transformer.rules.expression import ( + BinaryTermRule, + ConditionalRule, + ExprTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rule_transformer.rules.token_sequence import ( + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) + + @v_args(meta=True) + def start(self, meta: Meta, args) -> StartRule: + return StartRule(args, meta) + + @v_args(meta=True) + def body(self, meta: Meta, args) -> BodyRule: + return BodyRule(args, meta) + + @v_args(meta=True) + def block(self, meta: Meta, args) -> BlockRule: + return BlockRule(args, meta) + + @v_args(meta=True) + def identifier(self, meta: Meta, args) -> IdentifierRule: + return IdentifierRule(args, meta) + + @v_args(meta=True) + def int_lit(self, meta: Meta, args) -> IntLitRule: + return IntLitRule(args, meta) + + @v_args(meta=True) + def float_lit(self, meta: Meta, args) -> FloatLitRule: + return FloatLitRule(args, meta) + + @v_args(meta=True) + def string_lit(self, meta: Meta, args) -> StringLitRule: + return StringLitRule(args, meta) + + @v_args(meta=True) + def expr_term(self, meta: Meta, args) -> ExprTermRule: + return ExprTermRule(args, meta) + + @v_args(meta=True) + def conditional(self, meta: Meta, args) -> ConditionalRule: + return ConditionalRule(args, meta) + + @v_args(meta=True) + def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule: + return BinaryOperatorRule(args, meta) + + @v_args(meta=True) + def binary_term(self, meta: Meta, args) -> BinaryTermRule: + return BinaryTermRule(args, meta) + + @v_args(meta=True) + def unary_op(self, meta: Meta, args) -> UnaryOpRule: + return UnaryOpRule(args, meta) + + @v_args(meta=True) + def binary_op(self, meta: Meta, args) -> BinaryOpRule: + return BinaryOpRule(args, meta) + + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args, meta) + return Discard diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py new file mode 100644 index 00000000..060d3b53 --- /dev/null +++ b/hcl2/rule_transformer/utils.py @@ -0,0 +1,23 @@ +def is_dollar_string(value: str) -> bool: + if not isinstance(value, str): + return False + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + return f"({value})" diff --git a/hcl2/serialization.py b/hcl2/serialization.py deleted file mode 100644 index 15d75caa..00000000 --- a/hcl2/serialization.py +++ /dev/null @@ -1,496 +0,0 @@ -from abc import ABC, abstractmethod -from json import JSONEncoder -from typing import List, Any, Union, Tuple, Optional - -from lark import Tree, Token - -ArgsType = List["LarkElement"] - - -def is_dollar_string(value: str) -> bool: - return value.startswith("${") and value.endswith("}") - - -def to_dollar_string(value: str) -> str: - if not is_dollar_string(value): - return f"${{{value}}}" - return value - - -def unwrap_dollar_string(value: str) -> str: - if is_dollar_string(value): - return value[2:-1] - return value - - -def wrap_into_parentheses(value: str) -> str: - if is_dollar_string(value): - value = unwrap_dollar_string(value) - return to_dollar_string(f"({value})") - - return f"({value})" - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) - - -class LarkElement(ABC): - @abstractmethod - def tree(self) -> Token: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name - self._value = value - - @property - def name(self) -> str: - return self._name - - @property - def value(self): - return self._value - - def serialize(self) -> Any: - return self._value - - def tree(self) -> Token: - return Token(self.name, self.value) - - def __str__(self) -> str: - return str(self._value) - - def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken - - -class TokenSequence: - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] - - def joined(self): - return "".join(str(token) for token in self.tokens) - - -class LarkRule(ABC): - _classes = [] - - @staticmethod - @abstractmethod - def rule_name() -> str: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - def tree(self) -> Tree: - result_children = [] - for child in self._children: - if child is None: - continue - - if isinstance(child, TokenSequence): - result_children.extend(child.tree()) - else: - result_children.append(child.tree()) - - return Tree(self.rule_name(), result_children) - - def __init__(self, children): - self._children: List[LarkElement] = children - - def __init_subclass__(cls, **kwargs): - cls._classes.append(cls) - - def __repr__(self): - return f"" - - -class StartRule(LarkRule): - - _children: Tuple["BodyRule"] - - @staticmethod - def rule_name() -> str: - return "start" - - @property - def body(self) -> "BodyRule": - return self._children[0] - - def serialize(self) -> Any: - return self.body.serialize() - - -class BodyRule(LarkRule): - - _children: List[ - Union[ - "NewLineOrCommentRule", - "AttributeRule", - "BlockRule", - ] - ] - - @staticmethod - def rule_name() -> str: - return "body" - - def serialize(self) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] - comments = [] - - for child in self._children: - if isinstance(child, BlockRule): - blocks.append(child) - if isinstance(child, AttributeRule): - attributes.append(child) - if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() - if child_comments: - comments.extend(child_comments) - - result = {} - - for attribute in attributes: - result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} - ) - - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) - - if comments: - result["__comments__"] = comments - - return result - - -class BlockRule(LarkRule): - @staticmethod - def rule_name() -> str: - return "block" - - def __init__(self, children): - super().__init__(children) - *self._labels, self._body = children - - @property - def labels(self) -> List["IdentifierRule"]: - return list(filter(lambda label: label is not None, self._labels)) - - @property - def body(self) -> BodyRule: - return self._body - - def serialize(self) -> BodyRule: - result = self._body.serialize() - labels = self._labels - for label in reversed(labels[1:]): - result = {label.serialize(): result} - return result - - -class IdentifierRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "identifier" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class IntLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "int_lit" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class FloatLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "float_lit" - - def __init__(self, children): - print("float_lit", children) - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class StringLitRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "STRING_LIT" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined()[1:-1] - - -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: - return "expression" - - -class ExprTermRule(Expression): - @staticmethod - def rule_name() -> str: - return "expr_term" - - def __init__(self, children): - self._parentheses = False - if ( - isinstance(children[0], LarkToken) - and children[0].name == "LPAR" - and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" - ): - self._parentheses = True - children = children[1:-1] - super().__init__(children) - - @property - def parentheses(self) -> bool: - return self._parentheses - - def serialize(self) -> Any: - result = self._children[0].serialize() - if self._parentheses: - result = wrap_into_parentheses(result) - result = to_dollar_string(result) - return result - - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - - -class ConditionalRule(ExprTermRule): - - _children: Tuple[ - Expression, - Expression, - Expression, - ] - - @staticmethod - def rule_name(): - return "conditional" - - @property - def condition(self) -> Expression: - return self._children[0] - - @property - def if_true(self) -> Expression: - return self._children[1] - - @property - def if_false(self) -> Expression: - return self._children[2] - - def __init__(self, children): - super().__init__(children) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" - return to_dollar_string(result) - - -class BinaryOperatorRule(LarkRule): - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "binary_operator" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - -class BinaryTermRule(LarkRule): - _children: Tuple[ - BinaryOperatorRule, - Optional["NewLineOrCommentRule"], - ExprTermRule, - ] - - @staticmethod - def rule_name() -> str: - return "binary_term" - - def __init__(self, children): - if len(children) == 2: - children.insert(1, None) - super().__init__(children) - - @property - def binary_operator(self) -> BinaryOperatorRule: - return self._children[0] - - @property - def comment(self) -> Optional["NewLineOrCommentRule"]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - - @property - def expr_term(self) -> ExprTermRule: - return self._children[2] - - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" - - -class UnaryOpRule(Expression): - _children: Tuple[LarkToken, ExprTermRule] - - @staticmethod - def rule_name() -> str: - return "unary_op" - - @property - def unary_operator(self) -> str: - return str(self._children[0]) - - @property - def expr_term(self): - return self._children[1] - - def serialize(self) -> Any: - return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") - - -class BinaryOpRule(Expression): - _children: Tuple[ - ExprTermRule, - BinaryTermRule, - "NewLineOrCommentRule", - ] - - @staticmethod - def rule_name() -> str: - return "binary_op" - - @property - def expr_term(self) -> ExprTermRule: - return self._children[0] - - @property - def binary_term(self) -> BinaryTermRule: - return self._children[1] - - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") - - -class AttributeRule(LarkRule): - _children: Tuple[ - IdentifierRule, - EQ_Token, - Expression, - ] - - @staticmethod - def rule_name() -> str: - return "attribute" - - @property - def identifier(self) -> IdentifierRule: - return self._children[0] - - @property - def expression(self) -> Expression: - return self._children[2] - - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} - - -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "new_line_or_comment" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() - if comment == "\n": - return None - - comment = comment.strip() - comments = comment.split("\n") - - result = [] - for comment in comments: - if comment.startswith("//"): - comment = comment[2:] - - elif comment.startswith("#"): - comment = comment[1:] - - if comment != "": - result.append(comment.strip()) - - return result From d9c2eca1f99a7edf9b6e16603755c5113dc8a8d7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Apr 2025 16:19:11 +0200 Subject: [PATCH 03/45] batch of different changes --- hcl2/rule_transformer/rules/abstract.py | 32 +++--- hcl2/rule_transformer/rules/base.py | 55 ++++++--- hcl2/rule_transformer/rules/expression.py | 104 +++++++++++------- hcl2/rule_transformer/rules/token_sequence.py | 31 +++--- hcl2/rule_transformer/rules/whitespace.py | 11 +- hcl2/rule_transformer/transformer.py | 12 +- hcl2/rule_transformer/utils.py | 9 ++ 7 files changed, 152 insertions(+), 102 deletions(-) diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 37f63a03..6c650ea3 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -4,14 +4,16 @@ from lark import Token, Tree from lark.tree import Meta +from hcl2.rule_transformer.utils import SerializationOptions + class LarkElement(ABC): @abstractmethod - def tree(self) -> Token: + def reverse(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @@ -28,10 +30,10 @@ def name(self) -> str: def value(self): return self._value - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self._value - def tree(self) -> Token: + def reverse(self) -> Token: return Token(self.name, self.value) def __str__(self) -> str: @@ -47,45 +49,45 @@ def __repr__(self) -> str: RPAR_TOKEN = LarkToken # right parenthesis -class TokenSequence: +class TokenSequence(LarkElement): def __init__(self, tokens: List[LarkToken]): self.tokens = tokens - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] + def reverse(self) -> List[Token]: + return [token.reverse() for token in self.tokens] - def joined(self): + def serialize(self, options: SerializationOptions = SerializationOptions()): return "".join(str(token) for token in self.tokens) -class LarkRule(ABC): +class LarkRule(LarkElement, ABC): @staticmethod @abstractmethod def rule_name() -> str: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @property def children(self) -> List[LarkElement]: return self._children - def tree(self) -> Tree: + def reverse(self) -> Tree: result_children = [] for child in self._children: if child is None: continue if isinstance(child, TokenSequence): - result_children.extend(child.tree()) + result_children.extend(child.reverse()) else: - result_children.append(child.tree()) + result_children.append(child.reverse()) - return Tree(self.rule_name(), result_children) + return Tree(self.rule_name(), result_children, meta=self._meta) - def __init__(self, children, meta: Optional[Meta] = None): + def __init__(self, children: List, meta: Optional[Meta] = None): self._children = children self._meta = meta diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index f46d8039..76d014e9 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import Tuple, Any, List, Union, Optional from lark.tree import Meta @@ -7,6 +8,7 @@ from hcl2.rule_transformer.rules.token_sequence import IdentifierRule from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions class AttributeRule(LarkRule): @@ -28,8 +30,8 @@ def identifier(self) -> IdentifierRule: def expression(self) -> Expression: return self._children[2] - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): @@ -46,18 +48,23 @@ class BodyRule(LarkRule): def rule_name() -> str: return "body" - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] - + inline_comments = [] for child in self._children: + if isinstance(child, BlockRule): blocks.append(child) + if isinstance(child, AttributeRule): attributes.append(child) + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) + if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() + child_comments = child.to_list() if child_comments: comments.extend(child_comments) @@ -65,15 +72,27 @@ def serialize(self) -> Any: for attribute in attributes: result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} + { + attribute.identifier.serialize( + options + ): attribute.expression.serialize(options) + } ) - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) + result_blocks = defaultdict(list) + for block in blocks: + name = block.labels[0].serialize(options) + if name in result.keys(): + raise RuntimeError(f"Attribute {name} is already defined.") + result_blocks[name].append(block.serialize(options)) + + result.update(**result_blocks) - if comments: - result["__comments__"] = comments + if options.with_comments: + if comments: + result["__comments__"] = comments + if inline_comments: + result["__inline_comments__"] = inline_comments return result @@ -90,8 +109,8 @@ def rule_name() -> str: def body(self) -> BodyRule: return self._children[0] - def serialize(self) -> Any: - return self.body.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.body.serialize(options) class BlockRule(LarkRule): @@ -103,7 +122,7 @@ def rule_name() -> str: return "block" def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children) + super().__init__(children, meta) *self._labels, self._body = children @property @@ -114,9 +133,11 @@ def labels(self) -> List[IdentifierRule]: def body(self) -> BodyRule: return self._body - def serialize(self) -> BodyRule: - result = self._body.serialize() + def serialize( + self, options: SerializationOptions = SerializationOptions() + ) -> BodyRule: + result = self._body.serialize(options) labels = self._labels for label in reversed(labels[1:]): - result = {label.serialize(): result} + result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 2a38912a..16daf310 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -16,6 +16,7 @@ wrap_into_parentheses, to_dollar_string, unwrap_dollar_string, + SerializationOptions, ) @@ -24,10 +25,35 @@ class Expression(LarkRule, ABC): def rule_name() -> str: return "expression" + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + result.extend(child.to_list()) + + elif isinstance(child, Expression): + result.extend(child.inline_comments()) + + return result + + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + class ExprTermRule(Expression): - _children: Tuple[ + type_ = Tuple[ Optional[LPAR_TOKEN], Optional[NewLineOrCommentRule], Expression, @@ -35,6 +61,8 @@ class ExprTermRule(Expression): Optional[RPAR_TOKEN], ] + _children: type_ + @staticmethod def rule_name() -> str: return "expr_term" @@ -48,34 +76,36 @@ def __init__(self, children, meta: Optional[Meta] = None): and children[-1].name == "RPAR" ): self._parentheses = True - children = children[1:-1] + else: + children = [None, *children, None] + + self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @property def parentheses(self) -> bool: return self._parentheses - def serialize(self) -> Any: - result = self._children[0].serialize() + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) return result - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - -class ConditionalRule(LarkRule): +class ConditionalRule(Expression): _children: Tuple[ Expression, + Optional[NewLineOrCommentRule], Expression, + Optional[NewLineOrCommentRule], + Optional[NewLineOrCommentRule], Expression, ] @@ -83,27 +113,28 @@ class ConditionalRule(LarkRule): def rule_name(): return "conditional" + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 4]) + super().__init__(children, meta) + @property def condition(self) -> Expression: return self._children[0] @property def if_true(self) -> Expression: - return self._children[1] + return self._children[2] @property def if_false(self) -> Expression: - return self._children[2] + return self._children[5] - def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children, meta) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) -class BinaryTermRule(LarkRule): +class BinaryTermRule(Expression): _children: Tuple[ BinaryOperatorRule, @@ -116,28 +147,19 @@ def rule_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - if len(children) == 2: - children.insert(1, None) + self._possibly_insert_null_comments(children, [1]) super().__init__(children, meta) @property def binary_operator(self) -> BinaryOperatorRule: return self._children[0] - @property - def comment(self) -> Optional[NewLineOrCommentRule]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - @property def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" class BinaryOpRule(Expression): @@ -159,10 +181,14 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + lhs = self.expr_term.serialize(options) + operator = self.binary_term.binary_operator.serialize(options) + rhs = self.binary_term.expr_term.serialize(options) + # below line is to avoid dollar string nested inside another dollar string, e.g.: + # hcl2: 15 + (10 * 12) + # desired json: "${15 + (10 * 12)}" + # undesired json: "${15 + ${(10 * 12)}}" rhs = unwrap_dollar_string(rhs) return to_dollar_string(f"{lhs} {operator} {rhs}") @@ -183,5 +209,5 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 66e22e2f..174e2510 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,9 +1,10 @@ from abc import ABC -from typing import Tuple, Any, List, Optional +from typing import Tuple, Any, List, Optional, Type from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions class TokenSequenceRule(LarkRule, ABC): @@ -12,10 +13,13 @@ class TokenSequenceRule(LarkRule, ABC): def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): children = [TokenSequence(children)] - super().__init__(children) + super().__init__(children, meta) - def serialize(self) -> Any: - return self._children[0].joined() + def serialized_type(self) -> Type: + return str + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.serialized_type()(self._children[0].serialize(options)) class IdentifierRule(TokenSequenceRule): @@ -23,17 +27,14 @@ class IdentifierRule(TokenSequenceRule): def rule_name() -> str: return "identifier" - def serialize(self) -> str: - return str(super().serialize()) - class IntLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "int_lit" - def serialize(self) -> float: - return int(super().serialize()) + def serialized_type(self) -> Type: + return int class FloatLitRule(TokenSequenceRule): @@ -41,23 +42,19 @@ class FloatLitRule(TokenSequenceRule): def rule_name() -> str: return "float_lit" - def serialize(self) -> float: - return float(super().serialize()) + def serialized_type(self) -> Type: + return float class StringLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: + # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; + # nevertheless, try to change it to a rule in hcl2.lark return "STRING_LIT" - def serialize(self) -> str: - return str(super().serialize()) - class BinaryOperatorRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "binary_operator" - - def serialize(self) -> str: - return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index f56a386e..b37cedc4 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,6 +1,7 @@ from typing import Optional, List, Any from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.utils import SerializationOptions class NewLineOrCommentRule(LarkRule): @@ -11,11 +12,13 @@ class NewLineOrCommentRule(LarkRule): def rule_name() -> str: return "new_line_or_comment" - def serialize(self) -> Any: - return TokenSequence(self._children).joined() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return TokenSequence(self._children).serialize(options) - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() + def to_list( + self, options: SerializationOptions = SerializationOptions() + ) -> Optional[List[str]]: + comment = self.serialize(options) if comment == "\n": return None diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 9e6af6ef..1c7d6157 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -40,13 +40,7 @@ class RuleTransformer(Transformer): def is_type_keyword(value: str) -> bool: return value in {"bool", "number", "string"} - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments + def __init__(self): super().__init__() def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: @@ -113,6 +107,4 @@ def attribute(self, meta: Meta, args) -> AttributeRule: @v_args(meta=True) def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args, meta) - return Discard + return NewLineOrCommentRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 060d3b53..e083d628 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,3 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class SerializationOptions: + with_comments: bool = True + with_meta: bool = False + + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): return False From 448ffd42050489eb92bbc5855a0905b04436c51f Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 4 Apr 2025 10:29:47 +0200 Subject: [PATCH 04/45] comments --- hcl2/rule_transformer/rules/whitespace.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index b37cedc4..96fe7c91 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -22,16 +22,19 @@ def to_list( if comment == "\n": return None - comment = comment.strip() comments = comment.split("\n") result = [] for comment in comments: - if comment.startswith("//"): - comment = comment[2:] + comment = comment.strip() - elif comment.startswith("#"): - comment = comment[1:] + for delimiter in ("//", "/*", "#"): + + if comment.startswith(delimiter): + comment = comment[len(delimiter) :] + + if comment.endswith("*/"): + comment = comment[:-2] if comment != "": result.append(comment.strip()) From 65f88bc3e7466b09108f4c0504c485d27e164558 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Jul 2025 17:03:05 +0200 Subject: [PATCH 05/45] various changes --- hcl2/parser.py | 4 +- hcl2/rule_transformer/editor.py | 77 ++++++ hcl2/rule_transformer/hcl2.lark | 166 +++++++++++ hcl2/rule_transformer/processor.py | 258 ++++++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 93 ++++--- hcl2/rule_transformer/rules/base.py | 53 ++-- hcl2/rule_transformer/rules/containers.py | 85 ++++++ hcl2/rule_transformer/rules/expression.py | 102 +++---- hcl2/rule_transformer/rules/indexing.py | 75 +++++ hcl2/rule_transformer/rules/literal_rules.py | 47 ++++ hcl2/rule_transformer/rules/strings.py | 50 ++++ hcl2/rule_transformer/rules/token_sequence.py | 116 ++++---- hcl2/rule_transformer/rules/tokens.py | 66 +++++ hcl2/rule_transformer/rules/tree.py | 106 +++++++ hcl2/rule_transformer/rules/whitespace.py | 46 +++- hcl2/rule_transformer/transformer.py | 103 +++++-- hcl2/rule_transformer/utils.py | 8 +- 17 files changed, 1232 insertions(+), 223 deletions(-) create mode 100644 hcl2/rule_transformer/editor.py create mode 100644 hcl2/rule_transformer/hcl2.lark create mode 100644 hcl2/rule_transformer/processor.py create mode 100644 hcl2/rule_transformer/rules/containers.py create mode 100644 hcl2/rule_transformer/rules/indexing.py create mode 100644 hcl2/rule_transformer/rules/literal_rules.py create mode 100644 hcl2/rule_transformer/rules/strings.py create mode 100644 hcl2/rule_transformer/rules/tokens.py create mode 100644 hcl2/rule_transformer/rules/tree.py diff --git a/hcl2/parser.py b/hcl2/parser.py index 79d50122..a0c87e34 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, @@ -29,7 +29,7 @@ def reconstruction_parser() -> Lark: if necessary. """ return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark", parser="lalr", # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: # diff --git a/hcl2/rule_transformer/editor.py b/hcl2/rule_transformer/editor.py new file mode 100644 index 00000000..9efce08f --- /dev/null +++ b/hcl2/rule_transformer/editor.py @@ -0,0 +1,77 @@ +import dataclasses +from copy import copy, deepcopy +from typing import List, Optional, Set, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.base import BlockRule, StartRule + + +@dataclasses.dataclass +class TreePathElement: + + name: str + index: int = 0 + + +@dataclasses.dataclass +class TreePath: + + elements: List[TreePathElement] = dataclasses.field(default_factory=list) + + @classmethod + def build(cls, elements: List[Tuple[str, Optional[int]] | str]): + results = [] + for element in elements: + if isinstance(element, tuple): + if len(element) == 1: + result = TreePathElement(element[0], 0) + else: + result = TreePathElement(*element) + else: + result = TreePathElement(element, 0) + + results.append(result) + + return cls(results) + + def __iter__(self): + return self.elements.__iter__() + + def __len__(self): + return self.elements.__len__() + + +class Editor: + def __init__(self, rules_tree: LarkRule): + self.rules_tree = rules_tree + + @classmethod + def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: + return cls._find_all(rules_tree, path_element.name)[path_element.index] + + @classmethod + def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: + children = [] + print("rule", rules_tree) + print("rule children", rules_tree.children) + for child in rules_tree.children: + if isinstance(child, LarkRule) and child.lark_name() == rule_name: + children.append(child) + + return children + + def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: + path = deepcopy(path.elements) + + current_rule = self.rules_tree + while len(path) > 0: + current_path, *path = path + print(current_path, path) + current_rule = self._find_one(current_rule, current_path) + + return self._find_all(current_rule, rule_name) + + # def visit(self, path: TreePath) -> "Editor": + # + # while len(path) > 1: + # current = diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark new file mode 100644 index 00000000..a7722118 --- /dev/null +++ b/hcl2/rule_transformer/hcl2.lark @@ -0,0 +1,166 @@ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments +NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ + +// Keywords +IF : "if" +IN : "in" +FOR : "for" +FOR_EACH : "for_each" + +// Identifiers and Names +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH + +// Literals +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) + +// Operators +BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +DOUBLE_EQ : "==" +NEQ : "!=" +LT : "<" +GT : ">" +LEQ : "<=" +GEQ : ">=" +MINUS : "-" +ASTERISK : "*" +SLASH : "/" +PERCENT : "%" +DOUBLE_AMP : "&&" +DOUBLE_PIPE : "||" +PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation +LPAR : "(" +RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" +COMMA : "," +DOT : "." +EQ : /[ \t]*=(?!=|>)/ +COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier: IDENTIFIER +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : expr_term | operation | conditional +interpolation: INTERP_START expression RBRACE +conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression + +// Operations +?operation : unary_op | binary_op +!unary_op : (MINUS | NOT) expr_term +binary_op : expression binary_term new_line_or_comment? +binary_term : binary_operator new_line_or_comment? expression +!binary_operator : BINARY_OP + +// Expression terms +expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR + | float_lit + | int_lit + | string + | tuple + | object + | function_call + | index_expr_term + | get_attr_expr_term + | identifier + | provider_function_call + | heredoc_template + | heredoc_template_trim + | attr_splat_expr_term + | full_splat_expr_term + | for_tuple_expr + | for_object_expr + +// Collections +tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB +object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +object_elem : object_elem_key ( EQ | COLON ) expression +object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression +object_elem_key_expression : LPAR expression RPAR +object_elem_key_dot_accessor : identifier (DOT identifier)+ + +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM + +// Functions +function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) +provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR + +// Indexing and attribute access +index_expr_term : expr_term index +get_attr_expr_term : expr_term get_attr +attr_splat_expr_term : expr_term attr_splat +full_splat_expr_term : expr_term full_splat +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT get_attr* +full_splat : FULL_SPLAT_START (get_attr | index)* + +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/processor.py b/hcl2/rule_transformer/processor.py new file mode 100644 index 00000000..b854aff5 --- /dev/null +++ b/hcl2/rule_transformer/processor.py @@ -0,0 +1,258 @@ +from copy import copy, deepcopy +from typing import ( + List, + Optional, + Union, + Callable, + Any, + Tuple, + Generic, + TypeVar, + cast, + Generator, +) + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + +T = TypeVar("T", bound=LarkRule) + + +class RulesProcessor(Generic[T]): + """""" + + @classmethod + def _traverse( + cls, + node: T, + predicate: Callable[[T], bool], + current_depth: int = 0, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor"]: + + results = [] + + if predicate(node): + results.append(cls(node)) + + if max_depth is not None and current_depth >= max_depth: + return results + + for child in node.children: + if child is None or not isinstance(child, LarkRule): + continue + + child_results = cls._traverse( + child, + predicate, + current_depth + 1, + max_depth, + ) + results.extend(child_results) + + return results + + def __init__(self, node: LarkRule): + self.node = node + + @property + def siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children + + @property + def next_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[self.node.index + 1 :] + + @property + def previous_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[: self.node.index - 1] + + def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: + child_processors = [self.__class__(child) for child in self.node.children] + yield self, child_processors + for processor in child_processors: + if isinstance(processor.node, LarkRule): + for result in processor.walk(): + yield result + + def find_block( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> "RulesProcessor[BlockRule]": + return self.find_blocks(labels, exact_match, max_depth)[0] + + def find_blocks( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor[BlockRule]"]: + """ + Find blocks by their labels. + + Args: + labels: List of label strings to match + exact_match: If True, all labels must match exactly. If False, labels can be a subset. + max_depth: Maximum depth to search + + Returns: + ... + """ + + def block_predicate(node: LarkRule) -> bool: + if not isinstance(node, BlockRule): + return False + + node_labels = [label.serialize() for label in node.labels] + + if exact_match: + return node_labels == labels + else: + # Check if labels is a prefix of node_labels + if len(labels) > len(node_labels): + return False + return node_labels[: len(labels)] == labels + + return cast( + List[RulesProcessor[BlockRule]], + self._traverse(self.node, block_predicate, max_depth=max_depth), + ) + + def attribute( + self, name: str, max_depth: Optional[int] = None + ) -> "RulesProcessor[AttributeRule]": + return self.find_attributes(name, max_depth)[0] + + def find_attributes( + self, name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor[AttributeRule]"]: + """ + Find attributes by their identifier name. + + Args: + name: Attribute name to search for + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching attributes + """ + + def attribute_predicate(node: LarkRule) -> bool: + if not isinstance(node, AttributeRule): + return False + return node.identifier.serialize() == name + + return self._traverse(self.node, attribute_predicate, max_depth=max_depth) + + def rule(self, rule_name: str, max_depth: Optional[int] = None): + return self.find_rules(rule_name, max_depth)[0] + + def find_rules( + self, rule_name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules of a specific type. + + Args: + rule_name: Name of the rule type to find + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + + def rule_predicate(node: LarkRule) -> bool: + return node.lark_name() == rule_name + + return self._traverse(self.node, rule_predicate, max_depth=max_depth) + + def find_by_predicate( + self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules matching a custom predicate. + + Args: + predicate: Function that returns True for nodes to collect + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + return self._traverse(self.node, predicate, max_depth) + + # Convenience methods + def get_all_blocks(self, max_depth: Optional[int] = None) -> List: + """Get all blocks in the tree.""" + return self.find_rules("block", max_depth) + + def get_all_attributes( + self, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """Get all attributes in the tree.""" + return self.find_rules("attribute", max_depth) + + def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in reversed(self.previous_siblings): + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in self.next_siblings: + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def append_child( + self, new_node: LarkRule, indentation: bool = True + ) -> "RulesProcessor": + children = self.node.children + if indentation: + if isinstance(children[-1], NewLineOrCommentRule): + children.pop() + children.append(NewLineOrCommentRule.from_string("\n ")) + + new_node = deepcopy(new_node) + new_node.set_parent(self.node) + new_node.set_index(len(children)) + children.append(new_node) + return self.__class__(new_node) + + def replace(self, new_node: LarkRule) -> "RulesProcessor": + new_node = deepcopy(new_node) + + self.node.parent.children.pop(self.node.index) + self.node.parent.children.insert(self.node.index, new_node) + new_node.set_parent(self.node.parent) + new_node.set_index(self.node.index) + return self.__class__(new_node) + + # def insert_before(self, new_node: LarkRule) -> bool: + # """Insert a new node before this one.""" + # if self.parent is None or self.parent_index < 0: + # return False + # + # try: + # self.parent.children.insert(self.parent_index, new_node) + # except (IndexError, AttributeError): + # return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 6c650ea3..d3a3b634 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional +from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree from lark.tree import Meta @@ -8,8 +8,23 @@ class LarkElement(ABC): + @property + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + def __init__(self, index: int = -1, parent: "LarkElement" = None): + self._index = index + self._parent = parent + + def set_index(self, i: int): + self._index = i + + def set_parent(self, node: "LarkElement"): + self._parent = node + @abstractmethod - def reverse(self) -> Any: + def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod @@ -17,53 +32,42 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A raise NotImplementedError() -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name +class LarkToken(LarkElement, ABC): + def __init__(self, value: Union[str, int]): self._value = value + super().__init__() @property - def name(self) -> str: - return self._name + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def serialize_conversion(self) -> Callable: + raise NotImplementedError() @property def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self._value + def serialize(self, options: SerializationOptions = SerializationOptions()): + return self.serialize_conversion(self.value) - def reverse(self) -> Token: - return Token(self.name, self.value) + def to_lark(self) -> Token: + return Token(self.lark_name, self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken -COLON_TOKEN = LarkToken -LPAR_TOKEN = LarkToken # left parenthesis -RPAR_TOKEN = LarkToken # right parenthesis - - -class TokenSequence(LarkElement): - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def reverse(self) -> List[Token]: - return [token.reverse() for token in self.tokens] - - def serialize(self, options: SerializationOptions = SerializationOptions()): - return "".join(str(token) for token in self.tokens) + return f"" class LarkRule(LarkElement, ABC): - @staticmethod + @property @abstractmethod - def rule_name() -> str: + def lark_name(self) -> str: raise NotImplementedError() @abstractmethod @@ -74,22 +78,33 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A def children(self) -> List[LarkElement]: return self._children - def reverse(self) -> Tree: + @property + def parent(self): + return self._parent + + @property + def index(self): + return self._index + + def to_lark(self) -> Tree: result_children = [] for child in self._children: if child is None: continue - if isinstance(child, TokenSequence): - result_children.extend(child.reverse()) - else: - result_children.append(child.reverse()) + result_children.append(child.to_lark()) - return Tree(self.rule_name(), result_children, meta=self._meta) + return Tree(self.lark_name, result_children, meta=self._meta) - def __init__(self, children: List, meta: Optional[Meta] = None): + def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + super().__init__() self._children = children self._meta = meta + for index, child in enumerate(children): + if child is not None: + child.set_index(index) + child.set_parent(self) + def __repr__(self): - return f"" + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 76d014e9..6d0c4924 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,9 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.token_sequence import IdentifierRule +from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions @@ -13,17 +13,17 @@ class AttributeRule(LarkRule): _children: Tuple[ - IdentifierRule, - EQ_Token, + IdentifierToken, + EQ_TOKEN, Expression, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "attribute" @property - def identifier(self) -> IdentifierRule: + def identifier(self) -> IdentifierToken: return self._children[0] @property @@ -39,13 +39,13 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - AttributeRule, + # AttributeRule, "BlockRule", ] ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "body" def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: @@ -71,13 +71,7 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A result = {} for attribute in attributes: - result.update( - { - attribute.identifier.serialize( - options - ): attribute.expression.serialize(options) - } - ) + result.update(attribute.serialize(options)) result_blocks = defaultdict(list) for block in blocks: @@ -101,14 +95,14 @@ class StartRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "start" - @property def body(self) -> BodyRule: return self._children[0] + @property + def lark_name(self) -> str: + return "start" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self.body.serialize(options) @@ -117,16 +111,19 @@ class BlockRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "block" - def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - *self._labels, self._body = children + + *self._labels, self._body = [ + child for child in children if not isinstance(child, LarkToken) + ] + + @property + def lark_name(self) -> str: + return "block" @property - def labels(self) -> List[IdentifierRule]: + def labels(self) -> List[IdentifierToken]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -138,6 +135,6 @@ def serialize( ) -> BodyRule: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels[1:]): + for label in reversed(labels): result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py new file mode 100644 index 00000000..c39f3ba2 --- /dev/null +++ b/hcl2/rule_transformer/rules/containers.py @@ -0,0 +1,85 @@ +from typing import Tuple, List, Optional, Union, Any + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, +) +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.tokens import ( + COLON_TOKEN, + EQ_TOKEN, + LBRACE_TOKEN, + COMMA_TOKEN, + RBRACE_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class ObjectElemKeyRule(LarkRule): + _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + @staticmethod + def lark_name() -> str: + return "object_elem_key" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.children[0].serialize(options) + + +class ObjectElemRule(LarkRule): + + _children: Tuple[ + ObjectElemKeyRule, + Union[EQ_TOKEN, COLON_TOKEN], + Expression, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem" + + @property + def key(self) -> ObjectElemKeyRule: + return self.children[0] + + @property + def expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return { + self.children[0].serialize(options): self.children[2].serialize(options) + } + + +class ObjectRule(InlineCommentMixIn): + + _children: Tuple[ + LBRACE_TOKEN, + Optional[NewLineOrCommentRule], + Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], + RBRACE_TOKEN, + ] + + @staticmethod + def lark_name() -> str: + return "object" + + @property + def elements(self) -> List[ObjectElemRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) + ] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = {} + for element in self.elements: + result.update(element.serialize()) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 16daf310..8a03f813 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -1,17 +1,18 @@ from abc import ABC -from typing import Any, Tuple, Optional, List +from copy import deepcopy +from typing import Any, Tuple, Optional -from lark import Tree, Token from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import ( - LarkRule, LarkToken, - LPAR_TOKEN, - RPAR_TOKEN, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule +from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, @@ -20,36 +21,14 @@ ) -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: +class Expression(InlineCommentMixIn, ABC): + @property + def lark_name(self) -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - def inline_comments(self): - result = [] - for child in self._children: - - if isinstance(child, NewLineOrCommentRule): - result.extend(child.to_list()) - - elif isinstance(child, Expression): - result.extend(child.inline_comments()) - - return result - - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): - for index in indexes: - try: - child = children[index] - except IndexError: - children.insert(index, None) - else: - if not isinstance(child, NewLineOrCommentRule): - children.insert(index, None) - class ExprTermRule(Expression): @@ -63,17 +42,17 @@ class ExprTermRule(Expression): _children: type_ - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].name == "LPAR" + and children[0].lark_name == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" + and children[-1].lark_name == "RPAR" ): self._parentheses = True else: @@ -90,11 +69,14 @@ def parentheses(self) -> bool: def expression(self) -> Expression: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) + + if options.unwrap_dollar_string: + result = unwrap_dollar_string(result) return result @@ -102,19 +84,21 @@ class ConditionalRule(Expression): _children: Tuple[ Expression, + QMARK_TOKEN, Optional[NewLineOrCommentRule], Expression, Optional[NewLineOrCommentRule], + COLON_TOKEN, Optional[NewLineOrCommentRule], Expression, ] - @staticmethod - def rule_name(): + @property + def lark_name(self) -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 4]) + self._possibly_insert_null_comments(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -123,13 +107,15 @@ def condition(self) -> Expression: @property def if_true(self) -> Expression: - return self._children[2] + return self._children[3] @property def if_false(self) -> Expression: - return self._children[5] + return self._children[7] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + options = options.replace(unwrap_dollar_string=True) + print(self.condition) result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) @@ -142,8 +128,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -166,11 +152,11 @@ class BinaryOpRule(Expression): _children: Tuple[ ExprTermRule, BinaryTermRule, - NewLineOrCommentRule, + Optional[NewLineOrCommentRule], ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_op" @property @@ -182,23 +168,23 @@ def binary_term(self) -> BinaryTermRule: return self._children[1] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - lhs = self.expr_term.serialize(options) - operator = self.binary_term.binary_operator.serialize(options) - rhs = self.binary_term.expr_term.serialize(options) - # below line is to avoid dollar string nested inside another dollar string, e.g.: - # hcl2: 15 + (10 * 12) - # desired json: "${15 + (10 * 12)}" - # undesired json: "${15 + ${(10 * 12)}}" - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") + children_options = options.replace(unwrap_dollar_string=True) + lhs = self.expr_term.serialize(children_options) + operator = self.binary_term.binary_operator.serialize(children_options) + rhs = self.binary_term.expr_term.serialize(children_options) + + result = f"{lhs} {operator} {rhs}" + if options.unwrap_dollar_string: + return result + return to_dollar_string(result) class UnaryOpRule(Expression): _children: Tuple[LarkToken, ExprTermRule] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "unary_op" @property diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py new file mode 100644 index 00000000..ce23d040 --- /dev/null +++ b/hcl2/rule_transformer/rules/indexing.py @@ -0,0 +1,75 @@ +from typing import List, Optional, Tuple, Any + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.tokens import ( + DOT_TOKEN, + IntToken, + LSQB_TOKEN, + RSQB_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string + + +class ShortIndexRule(LarkRule): + + _children: Tuple[ + DOT_TOKEN, + IntToken, + ] + + @property + def lark_name(self) -> str: + return "short_index" + + @property + def index(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f".{self.index.serialize(options)}" + + +class SqbIndex(InlineCommentMixIn): + _children: Tuple[ + LSQB_TOKEN, + Optional[NewLineOrCommentRule], + ExprTermRule, + Optional[NewLineOrCommentRule], + RSQB_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "braces_index" + + @property + def index_expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"[{self.index_expression.serialize(options)}]" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3]) + super().__init__(children, meta) + + +class IndexExprTermRule(Expression): + + _children: Tuple[ExprTermRule, SqbIndex] + + @property + def lark_name(self) -> str: + return "index_expr_term" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string( + f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + ) diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py new file mode 100644 index 00000000..06ca99ae --- /dev/null +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -0,0 +1,47 @@ +from abc import ABC +from typing import Any, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions + + +class TokenRule(LarkRule, ABC): + + _children: Tuple[LarkToken] + + @property + def token(self) -> LarkToken: + return self._children[0] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.token.serialize() + + +class IdentifierRule(TokenRule): + @property + def lark_name(self) -> str: + return "identifier" + + +class IntLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "int_lit" + + +class FloatLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "float_lit" + + +class StringPartRule(TokenRule): + @property + def lark_name(self) -> str: + return "string" + + +class BinaryOperatorRule(TokenRule): + @property + def lark_name(self) -> str: + return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py new file mode 100644 index 00000000..0f53c55a --- /dev/null +++ b/hcl2/rule_transformer/rules/strings.py @@ -0,0 +1,50 @@ +from typing import Tuple, Optional, List, Any, Union + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.tokens import ( + INTERP_START_TOKEN, + RBRACE_TOKEN, + DBLQUOTE_TOKEN, + STRING_CHARS_TOKEN, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + + @property + def lark_name(self) -> str: + return "string" + + @property + def string_parts(self): + return self.children[1:-1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class InterpolationRule(LarkRule): + + _children: Tuple[ + INTERP_START_TOKEN, + Expression, + RBRACE_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "interpolation" + + @property + def expression(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return "${" + self.expression.serialize(options) + "}" diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 174e2510..66d780b3 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,60 +1,56 @@ -from abc import ABC -from typing import Tuple, Any, List, Optional, Type - -from lark.tree import Meta - -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions - - -class TokenSequenceRule(LarkRule, ABC): - - _children: Tuple[TokenSequence] - - def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): - children = [TokenSequence(children)] - super().__init__(children, meta) - - def serialized_type(self) -> Type: - return str - - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.serialized_type()(self._children[0].serialize(options)) - - -class IdentifierRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "identifier" - - -class IntLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "int_lit" - - def serialized_type(self) -> Type: - return int - - -class FloatLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "float_lit" - - def serialized_type(self) -> Type: - return float - - -class StringLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; - # nevertheless, try to change it to a rule in hcl2.lark - return "STRING_LIT" - - -class BinaryOperatorRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "binary_operator" +# from abc import ABC +# from typing import Tuple, Any, List, Optional, Type +# +# from lark.tree import Meta +# +# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +# from hcl2.rule_transformer.utils import SerializationOptions +# +# +# class TokenSequenceRule(LarkRule, ABC): +# +# _children: Tuple[TokenSequence] +# +# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): +# children = [TokenSequence(children)] +# super().__init__(children, meta) +# +# def serialized_type(self) -> Type: +# return str +# +# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: +# return self.serialized_type()(self._children[0].serialize(options)) +# +# +# class IdentifierRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "identifier" +# +# +# class IntLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "int_lit" +# +# def serialized_type(self) -> Type: +# return int +# +# +# class FloatLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "float_lit" +# +# def serialized_type(self) -> Type: +# return float +# +# +# class StringLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; +# # nevertheless, try to change it to a rule in the grammar +# return "STRING_LIT" +# +# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py new file mode 100644 index 00000000..18e1ac07 --- /dev/null +++ b/hcl2/rule_transformer/rules/tokens.py @@ -0,0 +1,66 @@ +from typing import Callable, Any + +from hcl2.rule_transformer.rules.abstract import LarkToken + + +class StringToken(LarkToken): + def __init__(self, name: str, value: Any): + super().__init__(value) + self._name = name + + @property + def lark_name(self) -> str: + return self._name + + @property + def serialize_conversion(self) -> Callable: + return str + + +# explicitly define various kinds of string-based tokens +STRING_CHARS_TOKEN = StringToken +ESCAPED_INTERPOLATION_TOKEN = StringToken +BINARY_OP_TOKEN = StringToken +EQ_TOKEN = StringToken +COLON_TOKEN = StringToken +LPAR_TOKEN = StringToken # ( +RPAR_TOKEN = StringToken # ) +LBRACE_TOKEN = StringToken # { +RBRACE_TOKEN = StringToken # } +DOT_TOKEN = StringToken +COMMA_TOKEN = StringToken +QMARK_TOKEN = StringToken +LSQB_TOKEN = StringToken # [ +RSQB_TOKEN = StringToken # ] +INTERP_START_TOKEN = StringToken # ${ +DBLQUOTE_TOKEN = StringToken # " + + +class IdentifierToken(LarkToken): + @property + def lark_name(self) -> str: + return "IDENTIFIER" + + @property + def serialize_conversion(self) -> Callable: + return str + + +class IntToken(LarkToken): + @property + def lark_name(self) -> str: + return "INT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return int + + +class FloatToken(LarkToken): + @property + def lark_name(self) -> str: + return "FLOAT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return float diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rule_transformer/rules/tree.py new file mode 100644 index 00000000..e39d2077 --- /dev/null +++ b/hcl2/rule_transformer/rules/tree.py @@ -0,0 +1,106 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Any, Union + + +class LarkNode(ABC): + """Base class for all nodes in the tree""" + + def __init__(self, index: int = -1, parent: Optional["Node"] = None): + self._index = index + self._parent = parent + + @property + def parent(self) -> Optional["Node"]: + return self._parent + + @property + def index(self) -> int: + return self._index + + def set_parent(self, parent: "Node"): + self._parent = parent + + def set_index(self, index: int): + self._index = index + + @abstractmethod + def serialize(self, options=None) -> Any: + pass + + @abstractmethod + def to_lark(self) -> Any: + """Convert back to Lark representation""" + pass + + def is_leaf(self) -> bool: + """Check if this is a leaf node (atomic token)""" + return isinstance(self, LeafNode) + + def is_sequence(self) -> bool: + """Check if this is a token sequence node""" + return isinstance(self, SequenceNode) + + def is_internal(self) -> bool: + """Check if this is an internal node (grammar rule)""" + return isinstance(self, InternalNode) + + def is_atomic(self) -> bool: + """Check if this represents an atomic value (leaf or sequence)""" + return self.is_leaf() or self.is_sequence() + + +class LarkLeaf(Node, ABC): + """""" + + def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): + super().__init__(index, parent) + self._value = value + + @property + def value(self) -> Any: + return self._value + + def serialize(self, options=None) -> Any: + return self._value + + +class InternalNode(Node): + def __init__( + self, children: List[Node], index: int = -1, parent: Optional[Node] = None + ): + super().__init__(index, parent) + self._children = children or [] + + # Set parent and index for all children + for i, child in enumerate(self._children): + if child is not None: + child.set_parent(self) + child.set_index(i) + + @property + def children(self) -> List[Node]: + return self._children + + def add_child(self, child: Node): + """Add a child to this internal node""" + child.set_parent(self) + child.set_index(len(self._children)) + self._children.append(child) + + def remove_child(self, index: int) -> Optional[Node]: + """Remove child at given index""" + if 0 <= index < len(self._children): + child = self._children.pop(index) + if child: + child.set_parent(None) + # Update indices for remaining children + for i in range(index, len(self._children)): + if self._children[i]: + self._children[i].set_index(i) + return child + return None + + @abstractmethod + def rule_name(self) -> str: + """The name of the grammar rule this represents""" + pass diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 96fe7c91..65d5dd9c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,19 +1,19 @@ -from typing import Optional, List, Any +from abc import ABC +from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.literal_rules import TokenRule from hcl2.rule_transformer.utils import SerializationOptions -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: +class NewLineOrCommentRule(TokenRule): + @property + def lark_name(self) -> str: return "new_line_or_comment" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return TokenSequence(self._children).serialize(options) + @classmethod + def from_string(cls, string: str) -> "NewLineOrCommentRule": + return cls([LarkToken("NL_OR_COMMENT", string)]) def to_list( self, options: SerializationOptions = SerializationOptions() @@ -40,3 +40,29 @@ def to_list( result.append(comment.strip()) return result + + +class InlineCommentMixIn(LarkRule, ABC): + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + comments = child.to_list() + if comments is not None: + result.extend(comments) + + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1c7d6157..31e88d61 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,30 +1,45 @@ # pylint: disable=missing-function-docstring,unused-argument from typing import List, Union -from lark import Transformer, Tree, Token +from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from lark.visitors import _Leaf_T, Discard, v_args -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) +from hcl2.rule_transformer.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, +) from hcl2.rule_transformer.rules.expression import ( BinaryTermRule, - ConditionalRule, - ExprTermRule, - BinaryOpRule, UnaryOpRule, + BinaryOpRule, + ExprTermRule, + ConditionalRule, ) -from hcl2.rule_transformer.rules.token_sequence import ( - IdentifierRule, - IntLitRule, +from hcl2.rule_transformer.rules.indexing import ( + IndexExprTermRule, + SqbIndex, + ShortIndexRule, +) +from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, - StringLitRule, + IntLitRule, + IdentifierRule, BinaryOperatorRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule +from hcl2.rule_transformer.rules.tokens import ( + IdentifierToken, + StringToken, + IntToken, + FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -36,18 +51,24 @@ class RuleTransformer(Transformer): with_meta: bool - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} + def transform(self, tree: Tree) -> StartRule: + return super().transform(tree) - def __init__(self): + def __init__(self, discard_new_line_or_comments: bool = False): super().__init__() + self.discard_new_line_or_comments = discard_new_line_or_comments - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) + def __default_token__(self, token: Token) -> StringToken: + return StringToken(token.type, token.value) + + def IDENTIFIER(self, token: Token) -> IdentifierToken: + return IdentifierToken(token.value) - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) + def INT_LITERAL(self, token: Token) -> IntToken: + return IntToken(token.value) + + def FLOAT_LITERAL(self, token: Token) -> FloatToken: + return FloatToken(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -61,6 +82,16 @@ def body(self, meta: Meta, args) -> BodyRule: def block(self, meta: Meta, args) -> BlockRule: return BlockRule(args, meta) + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self.discard_new_line_or_comments: + return Discard + return NewLineOrCommentRule(args, meta) + @v_args(meta=True) def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) @@ -74,8 +105,16 @@ def float_lit(self, meta: Meta, args) -> FloatLitRule: return FloatLitRule(args, meta) @v_args(meta=True) - def string_lit(self, meta: Meta, args) -> StringLitRule: - return StringLitRule(args, meta) + def string(self, meta: Meta, args) -> StringRule: + return StringRule(args, meta) + + @v_args(meta=True) + def string_part(self, meta: Meta, args) -> StringPartRule: + return StringPartRule(args, meta) + + @v_args(meta=True) + def interpolation(self, meta: Meta, args) -> InterpolationRule: + return InterpolationRule(args, meta) @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: @@ -102,9 +141,25 @@ def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) @v_args(meta=True) - def attribute(self, meta: Meta, args) -> AttributeRule: - return AttributeRule(args, meta) + def object(self, meta: Meta, args) -> ObjectRule: + return ObjectRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - return NewLineOrCommentRule(args, meta) + def object_elem(self, meta: Meta, args) -> ObjectElemRule: + return ObjectElemRule(args, meta) + + @v_args(meta=True) + def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: + return ObjectElemKeyRule(args, meta) + + @v_args(meta=True) + def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: + return IndexExprTermRule(args, meta) + + @v_args(meta=True) + def braces_index(self, meta: Meta, args) -> SqbIndex: + return SqbIndex(args, meta) + + @v_args(meta=True) + def short_index(self, meta: Meta, args) -> ShortIndexRule: + return ShortIndexRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index e083d628..6a6ed661 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,11 +1,15 @@ -from dataclasses import dataclass +from dataclasses import dataclass, replace @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - + unwrap_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationOptions": + return replace(self, **kwargs) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From 5a10fece33cf401c4e2b23a1655e983c3c708e55 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 23 Jul 2025 11:48:44 +0200 Subject: [PATCH 06/45] batch of changes --- hcl2/parser.py | 2 +- hcl2/reconstructor.py | 7 +- hcl2/rule_transformer/deserializer.py | 31 +++ hcl2/rule_transformer/hcl2.lark | 25 +- hcl2/rule_transformer/rules/abstract.py | 36 ++- hcl2/rule_transformer/rules/base.py | 61 +++-- hcl2/rule_transformer/rules/containers.py | 165 ++++++++++++-- .../rules/{expression.py => expressions.py} | 135 ++++++----- hcl2/rule_transformer/rules/functions.py | 104 +++++++++ hcl2/rule_transformer/rules/indexing.py | 215 ++++++++++++++++-- hcl2/rule_transformer/rules/literal_rules.py | 34 +-- hcl2/rule_transformer/rules/strings.py | 42 ++-- hcl2/rule_transformer/rules/token_sequence.py | 56 ----- hcl2/rule_transformer/rules/tokens.py | 99 ++++---- hcl2/rule_transformer/rules/whitespace.py | 4 +- hcl2/rule_transformer/transformer.py | 90 ++++++-- hcl2/rule_transformer/utils.py | 41 +++- 17 files changed, 835 insertions(+), 312 deletions(-) create mode 100644 hcl2/rule_transformer/deserializer.py rename hcl2/rule_transformer/rules/{expression.py => expressions.py} (53%) create mode 100644 hcl2/rule_transformer/rules/functions.py delete mode 100644 hcl2/rule_transformer/rules/token_sequence.py diff --git a/hcl2/parser.py b/hcl2/parser.py index a0c87e34..3e524736 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark.lark", + "rule_transformer/hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 7f957d7b..555edcf6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -167,12 +167,17 @@ def _should_add_space(self, rule, current_terminal, is_block_label: bool = False if self._is_equals_sign(current_terminal): return True + if is_block_label: + pass + # print(rule, self._last_rule, current_terminal, self._last_terminal) + if is_block_label and isinstance(rule, Token) and rule.value == "string": if ( current_terminal == self._last_terminal == Terminal("DBLQUOTE") or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("NAME") + and self._last_terminal == Terminal("IDENTIFIER") ): + # print("true") return True # if we're in a ternary or binary operator, add space around the operator diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py new file mode 100644 index 00000000..5bdcf775 --- /dev/null +++ b/hcl2/rule_transformer/deserializer.py @@ -0,0 +1,31 @@ +import json +from typing import Any, TextIO, List + +from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.utils import DeserializationOptions + + +class Deserializer: + def __init__(self, options=DeserializationOptions()): + self.options = options + + def load_python(self, value: Any) -> LarkElement: + pass + + def loads(self, value: str) -> LarkElement: + return self.load_python(json.loads(value)) + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + def _deserialize(self, value: Any) -> LarkElement: + pass + + def _deserialize_dict(self, value: dict) -> LarkRule: + pass + + def _deserialize_list(self, value: List) -> LarkRule: + pass + + def _deserialize_expression(self, value: str) -> LarkRule: + pass diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index a7722118..3f8d913e 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -11,11 +11,9 @@ IN : "in" FOR : "for" FOR_EACH : "for_each" -// Identifiers and Names -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH // Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ DECIMAL : "0".."9" @@ -91,7 +89,8 @@ block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRAC new_line_or_comment: ( NL_OR_COMMENT )+ // Basic literals and identifiers -identifier: IDENTIFIER +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH int_lit: INT_LITERAL float_lit: FLOAT_LITERAL string: DBLQUOTE string_part* DBLQUOTE @@ -118,21 +117,20 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr // Collections -tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB -object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR @@ -143,9 +141,8 @@ heredoc_template : HEREDOC_TEMPLATE heredoc_template_trim : HEREDOC_TEMPLATE_TRIM // Functions -function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) -provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) // Indexing and attribute access index_expr_term : expr_term index @@ -156,7 +153,7 @@ full_splat_expr_term : expr_term full_splat braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB short_index : DOT INT_LITERAL get_attr : DOT identifier -attr_splat : ATTR_SPLAT get_attr* +attr_splat : ATTR_SPLAT (get_attr | index)* full_splat : FULL_SPLAT_START (get_attr | index)* // For expressions diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index d3a3b634..e32d9ddb 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -2,15 +2,16 @@ from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree +from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class LarkElement(ABC): - @property + @staticmethod @abstractmethod - def lark_name(self) -> str: + def lark_name() -> str: raise NotImplementedError() def __init__(self, index: int = -1, parent: "LarkElement" = None): @@ -28,7 +29,9 @@ def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @@ -37,11 +40,6 @@ def __init__(self, value: Union[str, int]): self._value = value super().__init__() - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @property @abstractmethod def serialize_conversion(self) -> Callable: @@ -51,27 +49,26 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.serialize_conversion(self.value) def to_lark(self) -> Token: - return Token(self.lark_name, self.value) + return Token(self.lark_name(), self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" + return f"" class LarkRule(LarkElement, ABC): - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @property @@ -94,7 +91,7 @@ def to_lark(self) -> Tree: result_children.append(child.to_lark()) - return Tree(self.lark_name, result_children, meta=self._meta) + return Tree(self.lark_name(), result_children, meta=self._meta) def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() @@ -103,6 +100,7 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: + print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 6d0c4924..da74954b 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,34 +3,37 @@ from lark.tree import Meta +from hcl2.dict_transformer import START_LINE, END_LINE from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): _children: Tuple[ - IdentifierToken, - EQ_TOKEN, - Expression, + NAME, + EQ, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "attribute" @property - def identifier(self) -> IdentifierToken: + def identifier(self) -> NAME: return self._children[0] @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return {self.identifier.serialize(options): self.expression.serialize(options)} @@ -44,11 +47,13 @@ class BodyRule(LarkRule): ] ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "body" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] @@ -99,11 +104,13 @@ class StartRule(LarkRule): def body(self) -> BodyRule: return self._children[0] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "start" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.body.serialize(options) @@ -118,12 +125,12 @@ def __init__(self, children, meta: Optional[Meta] = None): child for child in children if not isinstance(child, LarkToken) ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "block" @property - def labels(self) -> List[IdentifierToken]: + def labels(self) -> List[NAME]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -131,10 +138,18 @@ def body(self) -> BodyRule: return self._body def serialize( - self, options: SerializationOptions = SerializationOptions() - ) -> BodyRule: + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels): + for label in reversed(labels[1:]): result = {label.serialize(options): result} + + result.update( + { + START_LINE: self._meta.line, + END_LINE: self._meta.end_line, + } + ) + return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index c39f3ba2..11ac0f5e 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,7 +1,8 @@ +import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, IntLitRule, @@ -9,36 +10,135 @@ ) from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import ( - COLON_TOKEN, - EQ_TOKEN, - LBRACE_TOKEN, - COMMA_TOKEN, - RBRACE_TOKEN, + COLON, + EQ, + LBRACE, + COMMA, + RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class TupleRule(InlineCommentMixIn): + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + Tuple[ + ExpressionRule, + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ... + ], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "tuple" + + @property + def elements(self) -> List[ExpressionRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ExpressionRule) + ] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_tuples: + return [element.serialize(options, context) for element in self.elements] + + with context.modify(inside_dollar_string=True): + result = f"[{", ".join( + str(element.serialize(options, context)) for element in self.elements + )}]" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result class ObjectElemKeyRule(LarkRule): - _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] + + _children: Tuple[key_T] @staticmethod def lark_name() -> str: return "object_elem_key" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.children[0].serialize(options) + @property + def value(self) -> key_T: + return self._children[0] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + return self.value.serialize(options, context) + + +class ObjectElemKeyExpressionRule(LarkRule): + + _children: Tuple[ + LPAR, + ExpressionRule, + RPAR, + ] + + + @staticmethod + def lark_name() -> str: + return "object_elem_key_expression" + + @property + def expression(self) -> ExpressionRule: + return self._children[1] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + with context.modify(inside_dollar_string=True): + result = f"({self.expression.serialize(options, context)})" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ObjectElemKeyDotAccessor(LarkRule): + + _children: Tuple[ + IdentifierRule, + Tuple[ + IdentifierRule, + DOT, + ... + ] + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_dot_accessor" + + @property + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) class ObjectElemRule(LarkRule): _children: Tuple[ ObjectElemKeyRule, - Union[EQ_TOKEN, COLON_TOKEN], - Expression, + Union[EQ, COLON], + ExpressionRule, ] @staticmethod @@ -47,25 +147,31 @@ def lark_name() -> str: @property def key(self) -> ObjectElemKeyRule: - return self.children[0] + return self._children[0] @property def expression(self): - return self.children[2] + return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: return { - self.children[0].serialize(options): self.children[2].serialize(options) + self.key.serialize(options, context): self.expression.serialize(options, context) } class ObjectRule(InlineCommentMixIn): _children: Tuple[ - LBRACE_TOKEN, + LBRACE, Optional[NewLineOrCommentRule], - Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], - RBRACE_TOKEN, + Tuple[ + ObjectElemRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + ... + ], + RBRACE, ] @staticmethod @@ -78,8 +184,21 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - result = {} - for element in self.elements: - result.update(element.serialize()) + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_objects: + result = {} + for element in self.elements: + result.update(element.serialize(options, context)) + + return result + + with context.modify(inside_dollar_string=True): + result = f"{{{", ".join( + f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + for element in self.elements + )}}}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expressions.py similarity index 53% rename from hcl2/rule_transformer/rules/expression.py rename to hcl2/rule_transformer/rules/expressions.py index 8a03f813..d89f3b3c 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -8,7 +8,7 @@ LarkToken, ) from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, @@ -18,46 +18,46 @@ to_dollar_string, unwrap_dollar_string, SerializationOptions, + SerializationContext, ) -class Expression(InlineCommentMixIn, ABC): - @property - def lark_name(self) -> str: +class ExpressionRule(InlineCommentMixIn, ABC): + @staticmethod + def lark_name() -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class ExprTermRule(Expression): +class ExprTermRule(ExpressionRule): type_ = Tuple[ - Optional[LPAR_TOKEN], + Optional[LPAR], Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - Optional[RPAR_TOKEN], + Optional[RPAR], ] _children: type_ - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].lark_name == "LPAR" + and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].lark_name == "RPAR" + and children[-1].lark_name() == "RPAR" ): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @@ -66,35 +66,37 @@ def parentheses(self) -> bool: return self._parentheses @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: - result = self.expression.serialize(options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self.expression.serialize(options, context) + if self.parentheses: result = wrap_into_parentheses(result) - result = to_dollar_string(result) - - if options.unwrap_dollar_string: - result = unwrap_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class ConditionalRule(Expression): +class ConditionalRule(ExpressionRule): _children: Tuple[ - Expression, - QMARK_TOKEN, + ExpressionRule, + QMARK, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - COLON_TOKEN, + COLON, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -102,25 +104,34 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @property - def condition(self) -> Expression: + def condition(self) -> ExpressionRule: return self._children[0] @property - def if_true(self) -> Expression: + def if_true(self) -> ExpressionRule: return self._children[3] @property - def if_false(self) -> Expression: + def if_false(self) -> ExpressionRule: return self._children[7] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - options = options.replace(unwrap_dollar_string=True) - print(self.condition) - result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" - return to_dollar_string(result) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=False): + result = ( + f"{self.condition.serialize(options, context)} " + f"? {self.if_true.serialize(options, context)} " + f": {self.if_false.serialize(options, context)}" + ) + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class BinaryTermRule(Expression): + +class BinaryTermRule(ExpressionRule): _children: Tuple[ BinaryOperatorRule, @@ -128,8 +139,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -144,19 +155,21 @@ def binary_operator(self) -> BinaryOperatorRule: def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" -class BinaryOpRule(Expression): +class BinaryOpRule(ExpressionRule): _children: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_op" @property @@ -167,24 +180,28 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - children_options = options.replace(unwrap_dollar_string=True) - lhs = self.expr_term.serialize(children_options) - operator = self.binary_term.binary_operator.serialize(children_options) - rhs = self.binary_term.expr_term.serialize(children_options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + lhs = self.expr_term.serialize(options, context) + operator = self.binary_term.binary_operator.serialize(options, context) + rhs = self.binary_term.expr_term.serialize(options, context) result = f"{lhs} {operator} {rhs}" - if options.unwrap_dollar_string: - return result - return to_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class UnaryOpRule(Expression): + +class UnaryOpRule(ExpressionRule): _children: Tuple[LarkToken, ExprTermRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "unary_op" @property @@ -195,5 +212,9 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string( + f"{self.operator}{self.expr_term.serialize(options, context)}" + ) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py new file mode 100644 index 00000000..412a1667 --- /dev/null +++ b/hcl2/rule_transformer/rules/functions.py @@ -0,0 +1,104 @@ +from functools import lru_cache +from typing import Any, Optional, Tuple, Union, List + +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class ArgumentsRule(InlineCommentMixIn): + + _children: Tuple[ + ExpressionRule, + Tuple[ + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ExpressionRule, + ... + ], + Optional[Union[COMMA, ELLIPSIS]], + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "arguments" + + @property + @lru_cache(maxsize=None) + def has_ellipsis(self) -> bool: + for child in self._children[-2:]: + if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": + return True + return False + + @property + def arguments(self) -> List[ExpressionRule]: + return [child for child in self._children if isinstance(child, ExpressionRule)] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + if self.has_ellipsis: + result += " ..." + return result + + +class FunctionCallRule(InlineCommentMixIn): + + _children: Tuple[ + IdentifierRule, + Optional[IdentifierRule], + Optional[IdentifierRule], + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "function_call" + + @property + @lru_cache(maxsize=None) + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + @property + @lru_cache(maxsize=None) + def arguments(self) -> Optional[ArgumentsRule]: + for child in self._children[2:6]: + if isinstance(child, ArgumentsRule): + return child + + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ( + f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"({self.arguments.serialize(options, context) if self.arguments else ""})" + ) + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +# class ProviderFunctionCallRule(FunctionCallRule): +# _children: Tuple[ +# IdentifierRule, +# IdentifierRule, +# IdentifierRule, +# LPAR, +# Optional[NewLineOrCommentRule], +# Optional[ArgumentsRule], +# Optional[NewLineOrCommentRule], +# RPAR, +# ] +# +# @staticmethod +# def lark_name() -> str: +# return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index ce23d040..7a9b53a5 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -1,59 +1,69 @@ -from typing import List, Optional, Tuple, Any +from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import ( - DOT_TOKEN, - IntToken, - LSQB_TOKEN, - RSQB_TOKEN, + DOT, + IntLiteral, + LSQB, + RSQB, + ATTR_SPLAT, ) from hcl2.rule_transformer.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + to_dollar_string, + SerializationContext, +) class ShortIndexRule(LarkRule): _children: Tuple[ - DOT_TOKEN, - IntToken, + DOT, + IntLiteral, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "short_index" @property def index(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f".{self.index.serialize(options)}" -class SqbIndex(InlineCommentMixIn): +class SqbIndexRule(InlineCommentMixIn): _children: Tuple[ - LSQB_TOKEN, + LSQB, Optional[NewLineOrCommentRule], ExprTermRule, Optional[NewLineOrCommentRule], - RSQB_TOKEN, + RSQB, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "braces_index" @property def index_expression(self): return self.children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -61,15 +71,170 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class IndexExprTermRule(Expression): +class IndexExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, SqbIndex] + _children: Tuple[ExprTermRule, SqbIndexRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "index_expr_term" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string( - f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class GetAttrRule(LarkRule): + + _children: Tuple[ + DOT, + IdentifierRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr" + + @property + def identifier(self) -> IdentifierRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.identifier.serialize(options, context)}" + + +class GetAttrExprTermRule(ExpressionRule): + + _children: Tuple[ + ExprTermRule, + GetAttrRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def get_attr(self) -> GetAttrRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class AttrSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "attr_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".*" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class AttrSplatExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, AttrSplatRule] + + @staticmethod + def lark_name() -> str: + return "attr_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> AttrSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class FullSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "full_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return "[*]" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs ) + + +class FullSplatExprTermRule(ExpressionRule): + _children: Tuple[ExprTermRule, FullSplatRule] + + @staticmethod + def lark_name() -> str: + return "full_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> FullSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index 06ca99ae..db7e8289 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): @@ -13,35 +13,43 @@ class TokenRule(LarkRule, ABC): def token(self) -> LarkToken: return self._children[0] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.token.serialize() +class KeywordRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "keyword" + + class IdentifierRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "identifier" class IntLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "int_lit" class FloatLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "float_lit" class StringPartRule(TokenRule): - @property - def lark_name(self) -> str: - return "string" + @staticmethod + def lark_name() -> str: + return "string_part" class BinaryOperatorRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 0f53c55a..dc3b85b0 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -3,48 +3,56 @@ from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import StringPartRule from hcl2.rule_transformer.rules.tokens import ( - INTERP_START_TOKEN, - RBRACE_TOKEN, - DBLQUOTE_TOKEN, - STRING_CHARS_TOKEN, + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, ) -from hcl2.rule_transformer.utils import SerializationOptions class StringRule(LarkRule): - _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "string" @property def string_parts(self): return self.children[1:-1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class InterpolationRule(LarkRule): _children: Tuple[ - INTERP_START_TOKEN, - Expression, - RBRACE_TOKEN, + INTERP_START, + ExpressionRule, + RBRACE, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "interpolation" @property def expression(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return "${" + self.expression.serialize(options) + "}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string(self.expression.serialize(options)) diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py deleted file mode 100644 index 66d780b3..00000000 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ /dev/null @@ -1,56 +0,0 @@ -# from abc import ABC -# from typing import Tuple, Any, List, Optional, Type -# -# from lark.tree import Meta -# -# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -# from hcl2.rule_transformer.utils import SerializationOptions -# -# -# class TokenSequenceRule(LarkRule, ABC): -# -# _children: Tuple[TokenSequence] -# -# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): -# children = [TokenSequence(children)] -# super().__init__(children, meta) -# -# def serialized_type(self) -> Type: -# return str -# -# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: -# return self.serialized_type()(self._children[0].serialize(options)) -# -# -# class IdentifierRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "identifier" -# -# -# class IntLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "int_lit" -# -# def serialized_type(self) -> Type: -# return int -# -# -# class FloatLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "float_lit" -# -# def serialized_type(self) -> Type: -# return float -# -# -# class StringLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; -# # nevertheless, try to change it to a rule in the grammar -# return "STRING_LIT" -# -# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 18e1ac07..7dd79f63 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,54 +1,67 @@ -from typing import Callable, Any +from functools import lru_cache +from typing import Callable, Any, Type from hcl2.rule_transformer.rules.abstract import LarkToken class StringToken(LarkToken): - def __init__(self, name: str, value: Any): + """ + Single run-time base class; every `StringToken["..."]` call returns a + cached subclass whose static `lark_name()` yields the given string. + """ + + @staticmethod + @lru_cache(maxsize=None) + def __build_subclass(name: str) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + return type( # type: ignore + f"{name}_TOKEN", + (StringToken,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + }, + ) + + def __class_getitem__(cls, name: str) -> Type["StringToken"]: + if not isinstance(name, str): + raise TypeError("StringToken[...] expects a single str argument") + return cls.__build_subclass(name) + + def __init__(self, value: Any) -> None: super().__init__(value) - self._name = name @property - def lark_name(self) -> str: - return self._name - - @property - def serialize_conversion(self) -> Callable: - return str - - -# explicitly define various kinds of string-based tokens -STRING_CHARS_TOKEN = StringToken -ESCAPED_INTERPOLATION_TOKEN = StringToken -BINARY_OP_TOKEN = StringToken -EQ_TOKEN = StringToken -COLON_TOKEN = StringToken -LPAR_TOKEN = StringToken # ( -RPAR_TOKEN = StringToken # ) -LBRACE_TOKEN = StringToken # { -RBRACE_TOKEN = StringToken # } -DOT_TOKEN = StringToken -COMMA_TOKEN = StringToken -QMARK_TOKEN = StringToken -LSQB_TOKEN = StringToken # [ -RSQB_TOKEN = StringToken # ] -INTERP_START_TOKEN = StringToken # ${ -DBLQUOTE_TOKEN = StringToken # " - - -class IdentifierToken(LarkToken): - @property - def lark_name(self) -> str: - return "IDENTIFIER" - - @property - def serialize_conversion(self) -> Callable: + def serialize_conversion(self) -> Callable[[Any], str]: return str -class IntToken(LarkToken): - @property - def lark_name(self) -> str: +# explicitly define various kinds of string-based tokens for type hinting +NAME = StringToken["NAME"] +STRING_CHARS = StringToken["STRING_CHARS"] +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] +BINARY_OP = StringToken["BINARY_OP"] +EQ = StringToken["EQ"] +COLON = StringToken["COLON"] +LPAR = StringToken["LPAR"] +RPAR = StringToken["RPAR"] +LBRACE = StringToken["LBRACE"] +RBRACE = StringToken["RBRACE"] +DOT = StringToken["DOT"] +COMMA = StringToken["COMMA"] +ELLIPSIS = StringToken["ELLIPSIS"] +QMARK = StringToken["QMARK"] +LSQB = StringToken["LSQB"] +RSQB = StringToken["RSQB"] +INTERP_START = StringToken["INTERP_START"] +DBLQUOTE = StringToken["DBLQUOTE"] +ATTR_SPLAT = StringToken["ATTR_SPLAT"] +FULL_SPLAT = StringToken["FULL_SPLAT"] + + +class IntLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "INT_LITERAL" @property @@ -56,9 +69,9 @@ def serialize_conversion(self) -> Callable: return int -class FloatToken(LarkToken): - @property - def lark_name(self) -> str: +class FloatLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "FLOAT_LITERAL" @property diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 65d5dd9c..fa24355c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -7,8 +7,8 @@ class NewLineOrCommentRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "new_line_or_comment" @classmethod diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 31e88d61..41e970d6 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -14,18 +14,28 @@ ObjectRule, ObjectElemRule, ObjectElemKeyRule, + TupleRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expression import ( +from hcl2.rule_transformer.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, - SqbIndex, + SqbIndexRule, ShortIndexRule, + GetAttrRule, + GetAttrExprTermRule, + AttrSplatExprTermRule, + AttrSplatRule, + FullSplatRule, + FullSplatExprTermRule, ) from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, @@ -36,10 +46,10 @@ ) from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( - IdentifierToken, + NAME, + IntLiteral, + FloatLiteral, StringToken, - IntToken, - FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -59,16 +69,16 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: - return StringToken(token.type, token.value) + return StringToken[token.type](token.value) - def IDENTIFIER(self, token: Token) -> IdentifierToken: - return IdentifierToken(token.value) + def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: + return FloatLiteral(token.value) - def INT_LITERAL(self, token: Token) -> IntToken: - return IntToken(token.value) + def NAME(self, token: Token) -> NAME: + return NAME(token.value) - def FLOAT_LITERAL(self, token: Token) -> FloatToken: - return FloatToken(token.value) + def INT_LITERAL(self, token: Token) -> IntLiteral: + return IntLiteral(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -140,6 +150,10 @@ def unary_op(self, meta: Meta, args) -> UnaryOpRule: def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) + @v_args(meta=True) + def tuple(self, meta: Meta, args) -> TupleRule: + return TupleRule(args, meta) + @v_args(meta=True) def object(self, meta: Meta, args) -> ObjectRule: return ObjectRule(args, meta) @@ -152,14 +166,62 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: return ObjectElemKeyRule(args, meta) + @v_args(meta=True) + def object_elem_key_expression( + self, meta: Meta, args + ) -> ObjectElemKeyExpressionRule: + return ObjectElemKeyExpressionRule(args, meta) + + @v_args(meta=True) + def object_elem_key_dot_accessor( + self, meta: Meta, args + ) -> ObjectElemKeyDotAccessor: + return ObjectElemKeyDotAccessor(args, meta) + + @v_args(meta=True) + def arguments(self, meta: Meta, args) -> ArgumentsRule: + return ArgumentsRule(args, meta) + + @v_args(meta=True) + def function_call(self, meta: Meta, args) -> FunctionCallRule: + return FunctionCallRule(args, meta) + + # @v_args(meta=True) + # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: + # return ProviderFunctionCallRule(args, meta) + @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) @v_args(meta=True) - def braces_index(self, meta: Meta, args) -> SqbIndex: - return SqbIndex(args, meta) + def braces_index(self, meta: Meta, args) -> SqbIndexRule: + return SqbIndexRule(args, meta) @v_args(meta=True) def short_index(self, meta: Meta, args) -> ShortIndexRule: return ShortIndexRule(args, meta) + + @v_args(meta=True) + def get_attr(self, meta: Meta, args) -> GetAttrRule: + return GetAttrRule(args, meta) + + @v_args(meta=True) + def get_attr_expr_term(self, meta: Meta, args) -> GetAttrExprTermRule: + return GetAttrExprTermRule(args, meta) + + @v_args(meta=True) + def attr_splat(self, meta: Meta, args) -> AttrSplatRule: + return AttrSplatRule(args, meta) + + @v_args(meta=True) + def attr_splat_expr_term(self, meta: Meta, args) -> AttrSplatExprTermRule: + return AttrSplatExprTermRule(args, meta) + + @v_args(meta=True) + def full_splat(self, meta: Meta, args) -> FullSplatRule: + return FullSplatRule(args, meta) + + @v_args(meta=True) + def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: + return FullSplatExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 6a6ed661..8ffeab8b 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,15 +1,48 @@ +from contextlib import contextmanager from dataclasses import dataclass, replace +from typing import Generator @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - unwrap_dollar_string: bool = False - - def replace(self, **kwargs) -> "SerializationOptions": + wrap_objects: bool = False + wrap_tuples: bool = False + + +@dataclass +class DeserializationOptions: + pass + + +@dataclass +class SerializationContext: + inside_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) - + + @contextmanager + def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: + """Context manager that yields a modified copy of the context""" + modified_context = self.replace(**kwargs) + yield modified_context + + @contextmanager + def modify(self, **kwargs): + original_values = {key: getattr(self, key) for key in kwargs} + + for key, value in kwargs.items(): + setattr(self, key, value) + + try: + yield + finally: + # Restore original values + for key, value in original_values.items(): + setattr(self, key, value) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From f0f6fc995624fc19878cfa86743aa899c7344b6c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Tue, 12 Aug 2025 14:48:52 +0200 Subject: [PATCH 07/45] add JSON -> LarkElement deserializer; batch of other changes --- hcl2/const.py | 1 + hcl2/rule_transformer/deserializer.py | 264 ++++++++++++++++++- hcl2/rule_transformer/rules/abstract.py | 3 +- hcl2/rule_transformer/rules/base.py | 23 +- hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/literal_rules.py | 6 - hcl2/rule_transformer/rules/strings.py | 57 ++-- hcl2/rule_transformer/rules/tokens.py | 72 +++-- hcl2/rule_transformer/transformer.py | 7 +- hcl2/rule_transformer/utils.py | 1 + 10 files changed, 365 insertions(+), 71 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1d46f35a..1bd4a4ce 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ START_LINE_KEY = "__start_line__" END_LINE_KEY = "__end_line__" +IS_BLOCK = "__is_block__" diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 5bdcf775..7b834968 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,7 +1,54 @@ import json +from functools import lru_cache from typing import Any, TextIO, List +from regex import regex + +from hcl2 import parses +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.rules.base import ( + BlockRule, + AttributeRule, + BodyRule, + StartRule, +) +from hcl2.rule_transformer.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyRule, +) +from hcl2.rule_transformer.rules.expressions import ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import ( + IdentifierRule, + IntLitRule, + FloatLitRule, +) +from hcl2.rule_transformer.rules.strings import ( + StringRule, + InterpolationRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.tokens import ( + NAME, + EQ, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + INTERP_START, + RBRACE, + IntLiteral, + FloatLiteral, + RSQB, + LSQB, + COMMA, + DOT, + LBRACE, +) +from hcl2.rule_transformer.transformer import RuleTransformer from hcl2.rule_transformer.utils import DeserializationOptions @@ -9,8 +56,13 @@ class Deserializer: def __init__(self, options=DeserializationOptions()): self.options = options + @property + @lru_cache + def _transformer(self) -> RuleTransformer: + return RuleTransformer() + def load_python(self, value: Any) -> LarkElement: - pass + return StartRule([self._deserialize(value)]) def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) @@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement: return self.loads(file.read()) def _deserialize(self, value: Any) -> LarkElement: - pass + if isinstance(value, dict): + if self._contains_block_marker(value): + elements = self._deserialize_block_elements(value) + return BodyRule(elements) + + return self._deserialize_object(value) + + if isinstance(value, list): + return self._deserialize_list(value) + + return self._deserialize_text(value) + + def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: + children = [] + + for key, value in value.items(): + if self._is_block(value): + # this value is a list of blocks, iterate over each block and deserialize them + for block in value: + children.append(self._deserialize_block(key, block)) + else: + + # otherwise it's just an attribute + if key != IS_BLOCK: + children.append(self._deserialize_attribute(key, value)) + + return children + + def _deserialize_text(self, value) -> LarkRule: + try: + int_val = int(value) + return IntLitRule([IntLiteral(int_val)]) + except ValueError: + pass + + try: + float_val = float(value) + return FloatLitRule([FloatLiteral(float_val)]) + except ValueError: + pass + + if isinstance(value, str): + if value.startswith('"') and value.endswith('"'): + return self._deserialize_string(value) + + if self._is_expression(value): + return self._deserialize_expression(value) + + return self._deserialize_identifier(value) + + elif isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + return self._deserialize_identifier(str(value)) + + def _deserialize_identifier(self, value: str) -> IdentifierRule: + return IdentifierRule([NAME(value)]) + + def _deserialize_string(self, value: str) -> StringRule: + result = [] + + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + + for part in parts: + if part == '"': + continue + + if part.startswith('"'): + part = part[1:] + if part.endswith('"'): + part = part[:-1] + + e = self._deserialize_string_part(part) + result.append(e) + + return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + + def _deserialize_string_part(self, value: str) -> StringPartRule: + if value.startswith("$${") and value.endswith("}"): + return StringPartRule([ESCAPED_INTERPOLATION(value)]) + + if value.startswith("${") and value.endswith("}"): + return StringPartRule( + [ + InterpolationRule( + [INTERP_START(), self._deserialize_expression(value), RBRACE()] + ) + ] + ) + + return StringPartRule([STRING_CHARS(value)]) + + def _deserialize_expression(self, value: str) -> ExprTermRule: + """Deserialize an expression string into an ExprTermRule.""" + # instead of processing expression manually and trying to recognize what kind of expression it is, + # turn it into HCL2 code and parse it with lark: + + # unwrap from ${ and } + value = value[2:-1] + # create HCL2 snippet + value = f"temp = {value}" + # parse the above + parsed_tree = parses(value) + # transform parsed tree into LarkElement tree + rules_tree = self._transformer.transform(parsed_tree) + # extract expression from the tree + return rules_tree.body.children[0].expression + + def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: + """Deserialize a block by extracting labels and body""" + labels = [first_label] + body = value + + # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) + while isinstance(body, dict) and not body.get(IS_BLOCK): + non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + if len(non_block_keys) == 1: + # This is another label level + label = non_block_keys[0] + labels.append(label) + body = body[label] + else: + # Multiple keys = this is the body + break + + return BlockRule( + [*[self._deserialize(label) for label in labels], self._deserialize(body)] + ) + + def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + children = [ + self._deserialize_identifier(name), + EQ(), + ExprTermRule([self._deserialize(value)]), + ] + return AttributeRule(children) + + def _deserialize_list(self, value: List) -> TupleRule: + children = [] + for element in value: + deserialized = self._deserialize(element) + if not isinstance(deserialized, ExprTermRule): + # whatever an element of the list is, it has to be nested inside ExprTermRule + deserialized = ExprTermRule([deserialized]) + children.append(deserialized) + children.append(COMMA()) + + return TupleRule([LSQB(), *children, RSQB()]) + + def _deserialize_object(self, value: dict) -> ObjectRule: + children = [] + for key, value in value.items(): + children.append(self._deserialize_object_elem(key, value)) + return ObjectRule([LBRACE(), *children, RBRACE()]) + + def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + if self._is_expression(key): + key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + elif "." in key: + parts = key.split(".") + children = [] + for part in parts: + children.append(self._deserialize_identifier(part)) + children.append(DOT()) + key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + else: + key = self._deserialize_text(key) + + return ObjectElemRule( + [ + ObjectElemKeyRule([key]), + EQ(), + ExprTermRule([self._deserialize_text(value)]), + ] + ) + + def _is_expression(self, value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + def _is_block(self, value: Any) -> bool: + """Simple check: if it's a list containing dicts with IS_BLOCK markers""" + if not isinstance(value, list) or len(value) == 0: + return False - def _deserialize_dict(self, value: dict) -> LarkRule: - pass + # Check if any item in the list has IS_BLOCK marker (directly or nested) + for item in value: + if isinstance(item, dict) and self._contains_block_marker(item): + return True - def _deserialize_list(self, value: List) -> LarkRule: - pass + return False - def _deserialize_expression(self, value: str) -> LarkRule: - pass + def _contains_block_marker(self, obj: dict) -> bool: + """Recursively check if a dict contains IS_BLOCK marker anywhere""" + if obj.get(IS_BLOCK): + return True + for value in obj.values(): + if isinstance(value, dict) and self._contains_block_marker(value): + return True + if isinstance(value, list): + for element in value: + if self._contains_block_marker(element): + return True + return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index e32d9ddb..33dcc9ca 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -36,7 +36,7 @@ def serialize( class LarkToken(LarkElement, ABC): - def __init__(self, value: Union[str, int]): + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -100,7 +100,6 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: - print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index da74954b..5c8468d4 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,11 @@ from lark.tree import Meta -from hcl2.dict_transformer import START_LINE, END_LINE +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -42,7 +44,7 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - # AttributeRule, + AttributeRule, "BlockRule", ] ] @@ -58,6 +60,7 @@ def serialize( attributes: List[AttributeRule] = [] comments = [] inline_comments = [] + for child in self._children: if isinstance(child, BlockRule): @@ -116,7 +119,11 @@ def serialize( class BlockRule(LarkRule): - _children: Tuple[BodyRule] + _children: Tuple[ + IdentifierRule, + Optional[Union[IdentifierRule, StringRule]], + BodyRule, + ] def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @@ -141,15 +148,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: result = self._body.serialize(options) + if options.explicit_blocks: + result.update({IS_BLOCK: True}) + labels = self._labels for label in reversed(labels[1:]): result = {label.serialize(options): result} - result.update( - { - START_LINE: self._meta.line, - END_LINE: self._meta.end_line, - } - ) - return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 412a1667..54958514 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -78,7 +78,7 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: result = ( - f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" f"({self.arguments.serialize(options, context) if self.arguments else ""})" ) if not context.inside_dollar_string: diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index db7e8289..baf8546f 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -43,12 +43,6 @@ def lark_name() -> str: return "float_lit" -class StringPartRule(TokenRule): - @staticmethod - def lark_name() -> str: - return "string_part" - - class BinaryOperatorRule(TokenRule): @staticmethod def lark_name() -> str: diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index dc3b85b0..769ad5b9 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,15 +1,13 @@ -from typing import Tuple, Optional, List, Any, Union - -from lark.tree import Meta +from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, STRING_CHARS, + ESCAPED_INTERPOLATION, ) from hcl2.rule_transformer.utils import ( SerializationOptions, @@ -18,41 +16,58 @@ ) -class StringRule(LarkRule): +class InterpolationRule(LarkRule): - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children: Tuple[ + INTERP_START, + ExpressionRule, + RBRACE, + ] @staticmethod def lark_name() -> str: - return "string" + return "interpolation" @property - def string_parts(self): - return self.children[1:-1] + def expression(self): + return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return to_dollar_string(self.expression.serialize(options)) -class InterpolationRule(LarkRule): +class StringPartRule(LarkRule): + _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] - _children: Tuple[ - INTERP_START, - ExpressionRule, - RBRACE, - ] + @staticmethod + def lark_name() -> str: + return "string_part" + + @property + def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.content.serialize(options, context) + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: - return "interpolation" + return "string" @property - def expression(self): - return self.children[1] + def string_parts(self): + return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string(self.expression.serialize(options)) + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 7dd79f63..59e524f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import Callable, Any, Type +from typing import Callable, Any, Type, Optional, Tuple from hcl2.rule_transformer.rules.abstract import LarkToken @@ -10,9 +10,9 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ - @staticmethod + @classmethod @lru_cache(maxsize=None) - def __build_subclass(name: str) -> Type["StringToken"]: + def __build_subclass(cls, name: str) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" return type( # type: ignore f"{name}_TOKEN", @@ -28,7 +28,7 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Any) -> None: + def __init__(self, value: Optional[Any] = None): super().__init__(value) @property @@ -36,27 +36,59 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str +class StaticStringToken(LarkToken): + @classmethod + @lru_cache(maxsize=None) + def __build_subclass( + cls, name: str, default_value: str = None + ) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + + return type( # type: ignore + f"{name}_TOKEN", + (cls,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + "_default_value": default_value, + }, + ) + + def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: + name, default_value = value + return cls.__build_subclass(name, default_value) + + def __init__(self): + super().__init__(getattr(self, "_default_value")) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + # explicitly define various kinds of string-based tokens for type hinting +# variable value NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -EQ = StringToken["EQ"] -COLON = StringToken["COLON"] -LPAR = StringToken["LPAR"] -RPAR = StringToken["RPAR"] -LBRACE = StringToken["LBRACE"] -RBRACE = StringToken["RBRACE"] -DOT = StringToken["DOT"] -COMMA = StringToken["COMMA"] -ELLIPSIS = StringToken["ELLIPSIS"] -QMARK = StringToken["QMARK"] -LSQB = StringToken["LSQB"] -RSQB = StringToken["RSQB"] -INTERP_START = StringToken["INTERP_START"] -DBLQUOTE = StringToken["DBLQUOTE"] -ATTR_SPLAT = StringToken["ATTR_SPLAT"] -FULL_SPLAT = StringToken["FULL_SPLAT"] +# static value +EQ = StaticStringToken[("EQ", "=")] +COLON = StaticStringToken[("COLON", ":")] +LPAR = StaticStringToken[("LPAR", "(")] +RPAR = StaticStringToken[("RPAR", ")")] +LBRACE = StaticStringToken[("LBRACE", "{")] +RBRACE = StaticStringToken[("RBRACE", "}")] +DOT = StaticStringToken[("DOT", ".")] +COMMA = StaticStringToken[("COMMA", ",")] +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] +QMARK = StaticStringToken[("QMARK", "?")] +LSQB = StaticStringToken[("LSQB", "[")] +RSQB = StaticStringToken[("RSQB", "]")] +INTERP_START = StaticStringToken[("INTERP_START", "${")] +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 41e970d6..a7d91605 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,6 +1,4 @@ # pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -42,9 +40,12 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.strings import ( + InterpolationRule, + StringRule, StringPartRule, ) -from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( NAME, IntLiteral, diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8ffeab8b..404bdcdd 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -9,6 +9,7 @@ class SerializationOptions: with_meta: bool = False wrap_objects: bool = False wrap_tuples: bool = False + explicit_blocks: bool = True @dataclass From d8ac92d8f41de654218280aeb26f2cf4a45879f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 27 Aug 2025 11:35:56 +0200 Subject: [PATCH 08/45] add heredoc rules and deserialization; require heredoc openers to be on their on separate line in lark grammar; whitespace trimming based on current implementation in dict_transformer.py; --- hcl2/rule_transformer/deserializer.py | 32 ++++++++--- hcl2/rule_transformer/hcl2.lark | 4 +- hcl2/rule_transformer/rules/strings.py | 73 ++++++++++++++++++++++++++ hcl2/rule_transformer/rules/tokens.py | 6 ++- hcl2/rule_transformer/transformer.py | 12 ++++- hcl2/rule_transformer/utils.py | 8 ++- 6 files changed, 123 insertions(+), 12 deletions(-) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 7b834968..a17a9510 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,6 @@ import json from functools import lru_cache -from typing import Any, TextIO, List +from typing import Any, TextIO, List, Union from regex import regex @@ -31,6 +31,8 @@ StringRule, InterpolationRule, StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -47,9 +49,11 @@ COMMA, DOT, LBRACE, + HEREDOC_TRIM_TEMPLATE, + HEREDOC_TEMPLATE, ) from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions +from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN class Deserializer: @@ -99,7 +103,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children - def _deserialize_text(self, value) -> LarkRule: + def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) return IntLitRule([IntLiteral(int_val)]) @@ -114,6 +118,16 @@ def _deserialize_text(self, value) -> LarkRule: if isinstance(value, str): if value.startswith('"') and value.endswith('"'): + if not self.options.heredocs_to_strings and value.startswith('"<<-'): + match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], True) + + if not self.options.heredocs_to_strings and value.startswith('"<<'): + match = HEREDOC_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], False) + return self._deserialize_string(value) if self._is_expression(value): @@ -131,11 +145,12 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule: def _deserialize_string(self, value: str) -> StringRule: result = [] - - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(value) if part != ""] + # split string into individual parts based on lark grammar # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + for part in parts: if part == '"': @@ -166,6 +181,11 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) + def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + if trim: + return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 3f8d913e..24140ada 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -67,8 +67,8 @@ ELLIPSIS : "..." COLONS: "::" // Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ // Ignore whitespace (but not newlines, as they're significant in HCL) %ignore /[ \t]+/ diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 769ad5b9..4e28e976 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,3 +1,4 @@ +import sys from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule @@ -8,11 +9,15 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, ) from hcl2.rule_transformer.utils import ( SerializationOptions, SerializationContext, to_dollar_string, + HEREDOC_TRIM_PATTERN, + HEREDOC_PATTERN, ) @@ -71,3 +76,71 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class HeredocTemplateRule(LarkRule): + + _children: Tuple[HEREDOC_TEMPLATE] + _trim_chars = "\n\t " + + + @staticmethod + def lark_name() -> str: + return "heredoc_template" + + @property + def heredoc(self): + return self.children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + result = heredoc.rstrip(self._trim_chars) + return f'"{result}"' + + +class HeredocTrimTemplateRule(HeredocTemplateRule): + + _children: Tuple[HEREDOC_TRIM_TEMPLATE] + + @staticmethod + def lark_name() -> str: + return "heredoc_trim_template" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions + # This is a special version of heredocs that are declared with "<<-" + # This will calculate the minimum number of leading spaces in each line of a heredoc + # and then remove that number of spaces from each line + + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_TRIM_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + heredoc = heredoc.rstrip(self._trim_chars) + lines = heredoc.split("\n") + + # calculate the min number of leading spaces in each line + min_spaces = sys.maxsize + for line in lines: + leading_spaces = len(line) - len(line.lstrip(" ")) + min_spaces = min(min_spaces, leading_spaces) + + # trim off that number of leading spaces from each line + lines = [line[min_spaces:] for line in lines] + return '"' + "\n".join(lines) + '"' + \ No newline at end of file diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 59e524f3..5b1959f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -67,12 +67,14 @@ def serialize_conversion(self) -> Callable[[Any], str]: # explicitly define various kinds of string-based tokens for type hinting -# variable value +# variable values NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -# static value +HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +# static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] LPAR = StaticStringToken[("LPAR", "(")] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index a7d91605..37ae445c 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -44,7 +44,9 @@ from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -127,6 +129,14 @@ def string_part(self, meta: Meta, args) -> StringPartRule: def interpolation(self, meta: Meta, args) -> InterpolationRule: return InterpolationRule(args, meta) + @v_args(meta=True) + def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: + return HeredocTemplateRule(args, meta) + + @v_args(meta=True) + def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: + return HeredocTrimTemplateRule(args, meta) + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 404bdcdd..98370ca3 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,7 +1,12 @@ +import re from contextlib import contextmanager from dataclasses import dataclass, replace from typing import Generator +HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) +HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + + @dataclass class SerializationOptions: @@ -10,11 +15,12 @@ class SerializationOptions: wrap_objects: bool = False wrap_tuples: bool = False explicit_blocks: bool = True + preserve_heredocs: bool = True @dataclass class DeserializationOptions: - pass + heredocs_to_strings: bool = False @dataclass From 5932662bfe5045c2e944f7c9e3fc55c94077c4c9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 15 Sep 2025 12:26:59 +0200 Subject: [PATCH 09/45] add `for` expressions rules --- .../rule_transformer/rules/for_expressions.py | 283 ++++++++++++++++++ hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 4 + hcl2/rule_transformer/transformer.py | 36 ++- 4 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 hcl2/rule_transformer/rules/for_expressions.py diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py new file mode 100644 index 00000000..18abe6c8 --- /dev/null +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -0,0 +1,283 @@ +from typing import Any, Tuple, Optional, List + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ForIntroRule(InlineCommentMixIn): + """Rule for the intro part of for expressions: 'for key, value in collection :'""" + + _children: Tuple[ + FOR, + Optional[NewLineOrCommentRule], + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + Optional[NewLineOrCommentRule], + IN, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "for_intro" + + def __init__(self, children, meta: Optional[Meta] = None): + # Insert null comments at positions where they might be missing + self._possibly_insert_null_second_identifier(children) + self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + super().__init__(children, meta) + + def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): + second_identifier_present = ( + len([child for child in children if isinstance(child, IdentifierRule)]) == 2 + ) + if not second_identifier_present: + children.insert(3, None) + children.insert(4, None) + + @property + def first_iterator(self) -> IdentifierRule: + """Returns the first iterator""" + return self._children[2] + + @property + def second_iterator(self) -> Optional[IdentifierRule]: + """Returns the second iterator or None if not present""" + return self._children[4] + + @property + def iterable(self) -> ExpressionRule: + """Returns the collection expression being iterated over""" + return self._children[8] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + result = "for " + + result += f"{self.first_iterator.serialize(options, context)}" + if self.second_iterator: + result += f", {self.second_iterator.serialize(options, context)}" + + result += f" in {self.iterable.serialize(options, context)} : " + + return result + + +class ForCondRule(InlineCommentMixIn): + """Rule for the optional condition in for expressions: 'if condition'""" + + _children: Tuple[ + IF, + Optional[NewLineOrCommentRule], + ExpressionRule, # condition expression + ] + + @staticmethod + def lark_name() -> str: + return "for_cond" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1]) + super().__init__(children, meta) + + @property + def condition_expr(self) -> ExpressionRule: + """Returns the condition expression""" + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + return f"if {self.condition_expr.serialize(options, context)}" + + +class ForTupleExprRule(ExpressionRule): + """Rule for tuple/array for expressions: [for item in items : expression]""" + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "for_tuple_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 5, 7]) + self._possibly_insert_null_condition(children) + super().__init__(children, meta) + + def _possibly_insert_null_condition(self, children: List[LarkElement]): + if not len([child for child in children if isinstance(child, ForCondRule)]): + children.insert(6, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[4] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[6] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "[" + + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += self.value_expr.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "]" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ForObjectExprRule(ExpressionRule): + """Rule for object for expressions: {for key, value in items : key => value}""" + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + FOR_OBJECT_ARROW, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ELLIPSIS], + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "for_object_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) + self._possibly_insert_null_optionals(children) + super().__init__(children, meta) + + def _possibly_insert_null_optionals(self, children: List[LarkElement]): + has_ellipsis = False + has_condition = False + + for child in children: + # if not has_ellipsis and isinstance(child, ELLIPSIS): + if ( + has_ellipsis is False + and child is not None + and child.lark_name() == ELLIPSIS.lark_name() + ): + has_ellipsis = True + if not has_condition and isinstance(child, ForCondRule): + has_condition = True + + if not has_ellipsis: + children.insert(9, None) + + if not has_condition: + children.insert(11, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def key_expr(self) -> ExpressionRule: + """Returns the key expression""" + return self._children[4] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[7] + + @property + def ellipsis(self) -> Optional[ELLIPSIS]: + """Returns the optional ellipsis token""" + return self._children[9] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[11] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = "{" + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += f"{self.key_expr.serialize(options, context)} => " + + result += self.value_expr.serialize( + SerializationOptions(wrap_objects=True), context + ) + + if self.ellipsis is not None: + result += self.ellipsis.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 54958514..b25fed62 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -40,7 +40,7 @@ def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) if self.has_ellipsis: result += " ..." return result diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 5b1959f3..67d53fcf 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -91,6 +91,10 @@ def serialize_conversion(self) -> Callable[[Any], str]: DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] +FOR = StaticStringToken[("FOR", "for")] +IN = StaticStringToken[("IN", "in")] +IF = StaticStringToken[("IF", "if")] +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 37ae445c..1ab1dfda 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -23,6 +23,12 @@ ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, + ForIntroRule, + ForCondRule, +) from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, @@ -40,12 +46,13 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, + KeywordRule, ) from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, - HeredocTemplateRule, + StringPartRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -72,6 +79,7 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: + # TODO make this return StaticStringToken where applicable return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: @@ -109,6 +117,10 @@ def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) + @v_args(meta=True) + def keyword(self, meta: Meta, args) -> KeywordRule: + return KeywordRule(args, meta) + @v_args(meta=True) def int_lit(self, meta: Meta, args) -> IntLitRule: return IntLitRule(args, meta) @@ -132,11 +144,11 @@ def interpolation(self, meta: Meta, args) -> InterpolationRule: @v_args(meta=True) def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: return HeredocTemplateRule(args, meta) - + @v_args(meta=True) def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: return HeredocTrimTemplateRule(args, meta) - + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) @@ -236,3 +248,19 @@ def full_splat(self, meta: Meta, args) -> FullSplatRule: @v_args(meta=True) def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: return FullSplatExprTermRule(args, meta) + + @v_args(meta=True) + def for_tuple_expr(self, meta: Meta, args) -> ForTupleExprRule: + return ForTupleExprRule(args, meta) + + @v_args(meta=True) + def for_object_expr(self, meta: Meta, args) -> ForObjectExprRule: + return ForObjectExprRule(args, meta) + + @v_args(meta=True) + def for_intro(self, meta: Meta, args) -> ForIntroRule: + return ForIntroRule(args, meta) + + @v_args(meta=True) + def for_cond(self, meta: Meta, args) -> ForCondRule: + return ForCondRule(args, meta) From 107fcb223f176793e04aa750f2c120cb38d00afa Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 29 Sep 2025 13:10:35 +0200 Subject: [PATCH 10/45] add Lark AST -> HCL2 reconstructor and LarkTree formatter; various other fixes and changes: * preserve order of serialized attributes and blocks * make RuleTransformer.__default_token__ differentiate between StringToken and StaticStringToken * add separate ProviderFunctionCallRule class for more accurate reconstruction --- hcl2/rule_transformer/deserializer.py | 120 +++++--- hcl2/rule_transformer/formatter.py | 262 ++++++++++++++++++ hcl2/rule_transformer/reconstructor.py | 204 ++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 5 +- hcl2/rule_transformer/rules/base.py | 48 ++-- hcl2/rule_transformer/rules/containers.py | 69 +++-- hcl2/rule_transformer/rules/expressions.py | 9 +- .../rule_transformer/rules/for_expressions.py | 92 +++--- hcl2/rule_transformer/rules/functions.py | 63 +++-- hcl2/rule_transformer/rules/indexing.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 12 +- hcl2/rule_transformer/rules/whitespace.py | 9 +- hcl2/rule_transformer/transformer.py | 3 + hcl2/rule_transformer/utils.py | 6 - 14 files changed, 738 insertions(+), 166 deletions(-) create mode 100644 hcl2/rule_transformer/formatter.py create mode 100644 hcl2/rule_transformer/reconstructor.py diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index a17a9510..56e1ad44 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,8 @@ import json +from abc import ABC, abstractmethod +from dataclasses import dataclass from functools import lru_cache -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Union, Optional from regex import regex @@ -31,7 +33,7 @@ StringRule, InterpolationRule, StringPartRule, - HeredocTemplateRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -51,14 +53,38 @@ LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, + COLON, ) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN -class Deserializer: - def __init__(self, options=DeserializationOptions()): - self.options = options +@dataclass +class DeserializerOptions: + heredocs_to_strings: bool = False + indent_length: int = 2 + object_elements_colon: bool = False + object_elements_trailing_comma: bool = True + + +class LarkElementTreeDeserializer(ABC): + def __init__(self, options: DeserializerOptions = None): + self.options = options or DeserializerOptions() + + @abstractmethod + def loads(self, value: str) -> LarkElement: + raise NotImplementedError() + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + +class BaseDeserializer(LarkElementTreeDeserializer): + def __init__(self, options=None): + super().__init__(options) + self._current_line = 1 + self._last_new_line: Optional[NewLineOrCommentRule] = None @property @lru_cache @@ -66,19 +92,23 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - return StartRule([self._deserialize(value)]) + result = StartRule([self._deserialize(value)]) + return result def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) - def load(self, file: TextIO) -> LarkElement: - return self.loads(file.read()) - def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - elements = self._deserialize_block_elements(value) - return BodyRule(elements) + + children = [] + + block_elements = self._deserialize_block_elements(value) + for element in block_elements: + children.append(element) + + return BodyRule(children) return self._deserialize_object(value) @@ -89,14 +119,13 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): if self._is_block(value): # this value is a list of blocks, iterate over each block and deserialize them for block in value: children.append(self._deserialize_block(key, block)) - else: + else: # otherwise it's just an attribute if key != IS_BLOCK: children.append(self._deserialize_attribute(key, value)) @@ -106,28 +135,24 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) + if "." in str(value): + return FloatLitRule([FloatLiteral(float(value))]) return IntLitRule([IntLiteral(int_val)]) except ValueError: pass - try: - float_val = float(value) - return FloatLitRule([FloatLiteral(float_val)]) - except ValueError: - pass - if isinstance(value, str): if value.startswith('"') and value.endswith('"'): if not self.options.heredocs_to_strings and value.startswith('"<<-'): match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], True) - + if not self.options.heredocs_to_strings and value.startswith('"<<'): match = HEREDOC_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], False) - + return self._deserialize_string(value) if self._is_expression(value): @@ -151,7 +176,6 @@ def _deserialize_string(self, value: str) -> StringRule: pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") parts = [part for part in pattern.split(value) if part != ""] - for part in parts: if part == '"': continue @@ -181,10 +205,12 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) - def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + def _deserialize_heredoc( + self, value: str, trim: bool + ) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: if trim: return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) - return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" @@ -200,7 +226,9 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree - return rules_tree.body.children[0].expression + result = rules_tree.body.children[0].expression + + return result def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: """Deserialize a block by extracting labels and body""" @@ -220,14 +248,24 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: break return BlockRule( - [*[self._deserialize(label) for label in labels], self._deserialize(body)] + [ + *[self._deserialize(label) for label in labels], + LBRACE(), + self._deserialize(body), + RBRACE(), + ] ) def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + expr_term = self._deserialize(value) + + if not isinstance(expr_term, ExprTermRule): + expr_term = ExprTermRule([expr_term]) + children = [ self._deserialize_identifier(name), EQ(), - ExprTermRule([self._deserialize(value)]), + expr_term, ] return AttributeRule(children) @@ -247,11 +285,21 @@ def _deserialize_object(self, value: dict) -> ObjectRule: children = [] for key, value in value.items(): children.append(self._deserialize_object_elem(key, value)) + + if self.options.object_elements_trailing_comma: + children.append(COMMA()) + return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + key = ObjectElemKeyExpressionRule( + [ + child + for child in self._deserialize_expression(key).children + if child is not None + ] + ) elif "." in key: parts = key.split(".") children = [] @@ -262,13 +310,13 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: else: key = self._deserialize_text(key) - return ObjectElemRule( - [ - ObjectElemKeyRule([key]), - EQ(), - ExprTermRule([self._deserialize_text(value)]), - ] - ) + result = [ + ObjectElemKeyRule([key]), + COLON() if self.options.object_elements_colon else EQ(), + ExprTermRule([self._deserialize(value)]), + ] + + return ObjectElemRule(result) def _is_expression(self, value: str) -> bool: return value.startswith("${") and value.endswith("}") diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/rule_transformer/formatter.py new file mode 100644 index 00000000..ad0247dc --- /dev/null +++ b/hcl2/rule_transformer/formatter.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List + +from hcl2.rule_transformer.rules.abstract import LarkElement +from hcl2.rule_transformer.rules.base import ( + StartRule, + BlockRule, + AttributeRule, + BodyRule, +) +from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +@dataclass +class FormatterOptions: + indent_length: int = 2 + open_empty_blocks: bool = True + open_empty_objects: bool = True + open_empty_tuples: bool = False + + vertically_align_attributes: bool = True + vertically_align_object_elements: bool = True + + +class LarkElementTreeFormatter(ABC): + def __init__(self, options: FormatterOptions = None): + self.options = options or FormatterOptions() + + @abstractmethod + def format_tree(self, tree: LarkElement): + raise NotImplementedError() + + +class BaseFormatter(LarkElementTreeFormatter): + def __init__(self, options: FormatterOptions = None): + super().__init__(options) + self._current_line = 1 + self._current_indent_level = 0 + + def format_tree(self, tree: LarkElement): + if isinstance(tree, StartRule): + self.format_start_rule(tree) + + def format_start_rule(self, rule: StartRule): + self.format_body_rule(rule.body, 0) + # for child in rule.body.children: + # if isinstance(child, BlockRule): + # self.format_block_rule(child, 1) + + def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + if self.options.vertically_align_attributes: + self._vertically_align_attributes_in_body(rule.body) + + self.format_body_rule(rule.body, indent_level) + if len(rule.body.children) > 0: + rule.children.insert(-1, self._build_newline(indent_level - 1)) + elif self.options.open_empty_blocks: + rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) + + def format_body_rule(self, rule: BodyRule, indent_level: int = 0): + + in_start = isinstance(rule.parent, StartRule) + + new_children = [] + if not in_start: + new_children.append(self._build_newline(indent_level)) + + for i, child in enumerate(rule.children): + new_children.append(child) + + if isinstance(child, AttributeRule): + self.format_attribute_rule(child, indent_level) + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, BlockRule): + self.format_block_rule(child, indent_level + 1) + + if i > 0: + new_children.insert(-2, self._build_newline(indent_level)) + new_children.append(self._build_newline(indent_level, 2)) + + new_children.pop(-1) + rule._children = new_children + + def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + self.format_expression(rule.expression, indent_level + 1) + + def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_tuples: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for child in rule.children: + new_children.append(child) + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + if isinstance(child, (COMMA, LSQB)): + new_children.append(self._build_newline(indent_level)) + + self._deindent_last_line() + rule._children = new_children + + def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_objects: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for i in range(len(rule.children)): + child = rule.children[i] + next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None + new_children.append(child) + + if isinstance(child, LBRACE): + new_children.append(self._build_newline(indent_level)) + + if ( + next_child + and isinstance(next_child, ObjectElemRule) + and isinstance(child, (ObjectElemRule, COMMA)) + ): + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, ObjectElemRule): + self.format_expression(child.expression, indent_level + 1) + + new_children.insert(-1, self._build_newline(indent_level)) + self._deindent_last_line() + + rule._children = new_children + + if self.options.vertically_align_object_elements: + self._vertically_align_object_elems(rule) + + def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + if isinstance(rule.expression, ObjectRule): + self.format_object_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, TupleRule): + self.format_tuple_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, ForTupleExprRule): + self.format_fortupleexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ForObjectExprRule): + self.format_forobjectexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ExprTermRule): + self.format_expression(rule.expression) + + def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 5, 7] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + self._deindent_last_line() + # expression.children[8] = self._build_newline(indent_level - 1) + + def format_forobjectexpr( + self, expression: ForObjectExprRule, indent_level: int = 0 + ): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 12] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + + self._deindent_last_line() + + def _vertically_align_attributes_in_body(self, body: BodyRule): + attributes_sequence: List[AttributeRule] = [] + + for child in body.children: + if isinstance(child, AttributeRule): + attributes_sequence.append(child) + + elif attributes_sequence: + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + attributes_sequence = [] + + def _vertically_align_object_elems(self, rule: ObjectRule): + max_length = max(len(elem.key.serialize()) for elem in rule.elements) + for elem in rule.elements: + key_length = len(elem.key.serialize()) + print(elem.key.serialize(), key_length) + + spaces_to_add = max_length - key_length + + separator = elem.children[1] + if isinstance(separator, COLON): + spaces_to_add += 1 + + elem.children[1].set_value(" " * spaces_to_add + separator.value) + + def _move_to_next_line(self, times: int = 1): + self._current_line += times + + def _increase_indent_level(self, times: int = 1): + self._current_indent_level += times + + def _decrease_indent_level(self, times: int = 1): + self._current_indent_level -= times + if self._current_indent_level < 0: + self._current_indent_level = 0 + + def _build_newline( + self, next_line_indent: int = 0, count: int = 1 + ) -> NewLineOrCommentRule: + result = NewLineOrCommentRule( + [ + NL_OR_COMMENT( + ("\n" * count) + " " * self.options.indent_length * next_line_indent + ) + ] + ) + self._last_new_line = result + return result + + def _deindent_last_line(self, times: int = 1): + token = self._last_new_line.token + for i in range(times): + if token.value.endswith(" " * self.options.indent_length): + token.set_value(token.value[: -self.options.indent_length]) + + # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: + # result = Meta() + # result.empty = length == 0 + # result.line = self._current_line + # result.column = indent_level * self.options.indent_length + # # result.start_pos = + # # result.end_line = + # # result.end_column = + # # result.end_pos = + # # result.orig_expansion = + # # result.match_tree = + # return result diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py new file mode 100644 index 00000000..7d316b2c --- /dev/null +++ b/hcl2/rule_transformer/reconstructor.py @@ -0,0 +1,204 @@ +from typing import List, Union + +from lark import Tree, Token +from hcl2.rule_transformer.rules import tokens +from hcl2.rule_transformer.rules.base import BlockRule +from hcl2.rule_transformer.rules.for_expressions import ForIntroRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule + + +class HCLReconstructor: + """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: + return False + + # Don't add space at the beginning + if self._last_token_name is None: + return False + + if isinstance(current_node, Token): + token_type = current_node.type + + # Space before '{' in blocks + if ( + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() + ): + return True + + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + + return True + + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False + return True + + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), + ]: + return True + + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True + + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True + + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True + + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: + return True + + # space around binary operators + if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data + + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True + + return False + + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] + else: + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 33dcc9ca..e83fed2b 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -49,6 +49,9 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value + def set_value(self, value: Any): + self._value = value + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: @@ -96,7 +99,7 @@ def to_lark(self) -> Tree: def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() self._children = children - self._meta = meta + self._meta = meta or Meta() for index, child in enumerate(children): if child is not None: diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 5c8468d4..c879b772 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -5,10 +5,10 @@ from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ +from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext @@ -16,9 +16,9 @@ class AttributeRule(LarkRule): _children: Tuple[ - NAME, + IdentifierRule, EQ, - ExpressionRule, + ExprTermRule, ] @staticmethod @@ -26,11 +26,11 @@ def lark_name() -> str: return "attribute" @property - def identifier(self) -> NAME: + def identifier(self) -> IdentifierRule: return self._children[0] @property - def expression(self) -> ExpressionRule: + def expression(self) -> ExprTermRule: return self._children[2] def serialize( @@ -56,40 +56,32 @@ def lark_name() -> str: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] + attribute_names = set() comments = [] inline_comments = [] + result = defaultdict(list) + for child in self._children: if isinstance(child, BlockRule): - blocks.append(child) + name = child.labels[0].serialize(options) + if name in attribute_names: + raise RuntimeError(f"Attribute {name} is already defined.") + result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attributes.append(child) - # collect in-line comments from attribute assignments, expressions etc - inline_comments.extend(child.expression.inline_comments()) + attribute_names.add(child) + result.update(child.serialize(options)) + if options.with_comments: + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) - if isinstance(child, NewLineOrCommentRule): + if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() if child_comments: comments.extend(child_comments) - result = {} - - for attribute in attributes: - result.update(attribute.serialize(options)) - - result_blocks = defaultdict(list) - for block in blocks: - name = block.labels[0].serialize(options) - if name in result.keys(): - raise RuntimeError(f"Attribute {name} is already defined.") - result_blocks[name].append(block.serialize(options)) - - result.update(**result_blocks) - if options.with_comments: if comments: result["__comments__"] = comments @@ -122,7 +114,9 @@ class BlockRule(LarkRule): _children: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], + LBRACE, BodyRule, + RBRACE, ] def __init__(self, children, meta: Optional[Meta] = None): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index 11ac0f5e..b82abc58 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -14,13 +14,22 @@ EQ, LBRACE, COMMA, - RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, + RBRACE, + LSQB, + RSQB, + LPAR, + RPAR, + DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class TupleRule(InlineCommentMixIn): @@ -33,7 +42,7 @@ class TupleRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], COMMA, Optional[NewLineOrCommentRule], - ... + # ... ], ExpressionRule, Optional[NewLineOrCommentRule], @@ -52,14 +61,18 @@ def elements(self) -> List[ExpressionRule]: child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_tuples: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] with context.modify(inside_dollar_string=True): - result = f"[{", ".join( + result = "[" + result += ", ".join( str(element.serialize(options, context)) for element in self.elements - )}]" + ) + result += "]" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -81,7 +94,9 @@ def lark_name() -> str: def value(self) -> key_T: return self._children[0] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.value.serialize(options, context) @@ -93,7 +108,6 @@ class ObjectElemKeyExpressionRule(LarkRule): RPAR, ] - @staticmethod def lark_name() -> str: return "object_elem_key_expression" @@ -102,7 +116,9 @@ def lark_name() -> str: def expression(self) -> ExpressionRule: return self._children[1] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -117,8 +133,7 @@ class ObjectElemKeyDotAccessor(LarkRule): Tuple[ IdentifierRule, DOT, - ... - ] + ], ] @staticmethod @@ -129,8 +144,12 @@ def lark_name() -> str: def identifiers(self) -> List[IdentifierRule]: return [child for child in self._children if isinstance(child, IdentifierRule)] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: - return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".".join( + identifier.serialize(options, context) for identifier in self.identifiers + ) class ObjectElemRule(LarkRule): @@ -153,9 +172,13 @@ def key(self) -> ObjectElemKeyRule: def expression(self): return self._children[2] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return { - self.key.serialize(options, context): self.expression.serialize(options, context) + self.key.serialize(options, context): self.expression.serialize( + options, context + ) } @@ -169,7 +192,6 @@ class ObjectRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], Optional[COMMA], Optional[NewLineOrCommentRule], - ... ], RBRACE, ] @@ -184,8 +206,10 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_objects: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_objects and not context.inside_dollar_string: result = {} for element in self.elements: result.update(element.serialize(options, context)) @@ -193,12 +217,13 @@ def serialize(self, options = SerializationOptions(), context = SerializationCon return result with context.modify(inside_dollar_string=True): - result = f"{{{", ".join( + result = "{" + result += ", ".join( f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" for element in self.elements - )}}}" + ) + result += "}" if not context.inside_dollar_string: result = to_dollar_string(result) - return result diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index d89f3b3c..0e0c9be8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -16,7 +16,6 @@ from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, - unwrap_dollar_string, SerializationOptions, SerializationContext, ) @@ -58,7 +57,7 @@ def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) @property @@ -100,7 +99,7 @@ def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [2, 4, 6]) + self._insert_optionals(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -118,7 +117,7 @@ def if_false(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - with context.modify(inside_dollar_string=False): + with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " f"? {self.if_true.serialize(options, context)} " @@ -144,7 +143,7 @@ def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py index 18abe6c8..3a89aba3 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -52,18 +52,23 @@ def lark_name() -> str: return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): - # Insert null comments at positions where they might be missing - self._possibly_insert_null_second_identifier(children) - self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): - second_identifier_present = ( - len([child for child in children if isinstance(child, IdentifierRule)]) == 2 - ) - if not second_identifier_present: - children.insert(3, None) - children.insert(4, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + identifiers = [child for child in children if isinstance(child, IdentifierRule)] + second_identifier = identifiers[1] if len(identifiers) == 2 else None + + indexes = [1, 5, 7, 9, 11] + if second_identifier is None: + indexes.extend([3, 4]) + + super()._insert_optionals(children, sorted(indexes)) + + if second_identifier is not None: + children[3] = COMMA() + children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: @@ -90,7 +95,6 @@ def serialize( result += f", {self.second_iterator.serialize(options, context)}" result += f" in {self.iterable.serialize(options, context)} : " - return result @@ -108,7 +112,7 @@ def lark_name() -> str: return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property @@ -142,13 +146,25 @@ def lark_name() -> str: return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 5, 7]) - self._possibly_insert_null_condition(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_condition(self, children: List[LarkElement]): - if not len([child for child in children if isinstance(child, ForCondRule)]): - children.insert(6, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + condition = None + + for child in children: + if isinstance(child, ForCondRule): + condition = child + break + + indexes = [1, 3, 5, 7] + + if condition is None: + indexes.append(6) + + super()._insert_optionals(children, sorted(indexes)) + + children[6] = condition @property def for_intro(self) -> ForIntroRule: @@ -209,30 +225,30 @@ def lark_name() -> str: return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) - self._possibly_insert_null_optionals(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_optionals(self, children: List[LarkElement]): - has_ellipsis = False - has_condition = False + def _insert_optionals(self, children: List, indexes: List[int] = None): + ellipsis_ = None + condition = None for child in children: - # if not has_ellipsis and isinstance(child, ELLIPSIS): - if ( - has_ellipsis is False - and child is not None - and child.lark_name() == ELLIPSIS.lark_name() - ): - has_ellipsis = True - if not has_condition and isinstance(child, ForCondRule): - has_condition = True - - if not has_ellipsis: - children.insert(9, None) - - if not has_condition: - children.insert(11, None) + if ellipsis_ is None and isinstance(child, ELLIPSIS): + ellipsis_ = child + if condition is None and isinstance(child, ForCondRule): + condition = child + + indexes = [1, 3, 6, 8, 10, 12] + + if ellipsis_ is None: + indexes.append(9) + if condition is None: + indexes.append(11) + + super()._insert_optionals(children, sorted(indexes)) + + children[9] = ellipsis_ + children[11] = condition @property def for_intro(self) -> ForIntroRule: @@ -262,6 +278,7 @@ def condition(self) -> Optional[ForCondRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) @@ -270,7 +287,6 @@ def serialize( result += self.value_expr.serialize( SerializationOptions(wrap_objects=True), context ) - if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index b25fed62..9e52a47b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -4,8 +4,15 @@ from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class ArgumentsRule(InlineCommentMixIn): @@ -17,7 +24,7 @@ class ArgumentsRule(InlineCommentMixIn): COMMA, Optional[NewLineOrCommentRule], ExpressionRule, - ... + # ... ], Optional[Union[COMMA, ELLIPSIS]], Optional[NewLineOrCommentRule], @@ -39,8 +46,12 @@ def has_ellipsis(self) -> bool: def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = ", ".join( + [str(argument.serialize(options, context)) for argument in self.arguments] + ) if self.has_ellipsis: result += " ..." return result @@ -75,30 +86,32 @@ def arguments(self) -> Optional[ArgumentsRule]: if isinstance(child, ArgumentsRule): return child - - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ( - f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" - f"({self.arguments.serialize(options, context) if self.arguments else ""})" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += ( + f"({self.arguments.serialize(options, context) if self.arguments else ''})" ) + if not context.inside_dollar_string: result = to_dollar_string(result) return result -# class ProviderFunctionCallRule(FunctionCallRule): -# _children: Tuple[ -# IdentifierRule, -# IdentifierRule, -# IdentifierRule, -# LPAR, -# Optional[NewLineOrCommentRule], -# Optional[ArgumentsRule], -# Optional[NewLineOrCommentRule], -# RPAR, -# ] -# -# @staticmethod -# def lark_name() -> str: -# return "provider_function_call" +class ProviderFunctionCallRule(FunctionCallRule): + _children: Tuple[ + IdentifierRule, + IdentifierRule, + IdentifierRule, + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index 7a9b53a5..20decf00 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -67,7 +67,7 @@ def serialize( return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 67d53fcf..ba948d3e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -37,6 +37,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: class StaticStringToken(LarkToken): + + classes_by_value = {} + @classmethod @lru_cache(maxsize=None) def __build_subclass( @@ -44,7 +47,7 @@ def __build_subclass( ) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" - return type( # type: ignore + result = type( # type: ignore f"{name}_TOKEN", (cls,), { @@ -53,6 +56,8 @@ def __build_subclass( "_default_value": default_value, }, ) + cls.classes_by_value[default_value] = result + return result def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: name, default_value = value @@ -72,8 +77,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index fa24355c..62069b78 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -3,7 +3,7 @@ from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): @@ -15,6 +15,11 @@ def lark_name() -> str: def from_string(cls, string: str) -> "NewLineOrCommentRule": return cls([LarkToken("NL_OR_COMMENT", string)]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: @@ -43,7 +48,7 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + def _insert_optionals(self, children: List, indexes: List[int] = None): for index in indexes: try: child = children[index] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1ab1dfda..931eab8e 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -60,6 +60,7 @@ IntLiteral, FloatLiteral, StringToken, + StaticStringToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -80,6 +81,8 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable + if token.value in StaticStringToken.classes_by_value.keys(): + return StaticStringToken.classes_by_value[token.value]() return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 98370ca3..8f1d7352 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -7,7 +7,6 @@ HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - @dataclass class SerializationOptions: with_comments: bool = True @@ -18,11 +17,6 @@ class SerializationOptions: preserve_heredocs: bool = True -@dataclass -class DeserializationOptions: - heredocs_to_strings: bool = False - - @dataclass class SerializationContext: inside_dollar_string: bool = False From 5ccfa657f28f152ea338c03d36508e365046c6f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 12 Dec 2025 14:09:37 +0100 Subject: [PATCH 11/45] * HCLReconstructor._reconstruct_token - handle 0 length tokens * FunctionCallRule.serialize - properly serialize into dollar string * remove unused import --- hcl2/rule_transformer/reconstructor.py | 3 ++- hcl2/rule_transformer/rules/containers.py | 1 - hcl2/rule_transformer/rules/functions.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 7d316b2c..6aa8a4a4 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -167,7 +167,8 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: result = " " + result self._last_token_name = token.type - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index b82abc58..a2f53436 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,4 +1,3 @@ -import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 9e52a47b..92cc8b11 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -89,10 +89,9 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += ( - f"({self.arguments.serialize(options, context) if self.arguments else ''})" - ) + with context.modify(inside_dollar_string=True): + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" if not context.inside_dollar_string: result = to_dollar_string(result) From ca192325cc03a72618773cf31199b53c27e24774 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 21 Feb 2026 14:33:09 +0100 Subject: [PATCH 12/45] fix operator precedence --- hcl2/rule_transformer/hcl2.lark | 60 +++++++++++++++++++--- hcl2/rule_transformer/reconstructor.py | 41 +++++++++++++-- hcl2/rule_transformer/rules/expressions.py | 55 +++++++++++++++++--- hcl2/rule_transformer/utils.py | 2 + 4 files changed, 138 insertions(+), 20 deletions(-) diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 24140ada..63154efb 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -24,7 +24,6 @@ FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EX | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) // Operators -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -99,16 +98,61 @@ string_part: STRING_CHARS | interpolation // Expressions -?expression : expr_term | operation | conditional +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr interpolation: INTERP_START expression RBRACE -conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -// Operations -?operation : unary_op | binary_op +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator + +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term !unary_op : (MINUS | NOT) expr_term -binary_op : expression binary_term new_line_or_comment? -binary_term : binary_operator new_line_or_comment? expression -!binary_operator : BINARY_OP // Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 6aa8a4a4..099beead 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -6,12 +6,32 @@ from hcl2.rule_transformer.rules.for_expressions import ForIntroRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule +from hcl2.rule_transformer.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } + def __init__(self): self._reset_state() @@ -105,8 +125,14 @@ def _should_add_space_before( if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - # space around binary operators - if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): + return False + + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): return True elif isinstance(current_node, Tree): @@ -130,7 +156,14 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st result = [] rule_name = tree.data - if rule_name == ExprTermRule.lark_name(): + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): # Check if parenthesized if ( len(tree.children) >= 3 diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index 0e0c9be8..db256e82 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -26,8 +26,30 @@ class ExpressionRule(InlineCommentMixIn, ABC): def lark_name() -> str: return "expression" - def __init__(self, children, meta: Optional[Meta] = None): + def __init__( + self, children, meta: Optional[Meta] = None, parentheses: bool = False + ): super().__init__(children, meta) + self._parentheses = parentheses + + def _wrap_into_parentheses( + self, value: str, options=SerializationOptions(), context=SerializationContext() + ) -> str: + # do not wrap into parentheses if + # 1. already wrapped or + # 2. is top-level expression (unless explicitly wrapped) + if context.inside_parentheses: + return value + # Look through ExprTermRule wrapper to determine if truly nested + parent = getattr(self, "parent", None) + if parent is None: + return value + if isinstance(parent, ExprTermRule): + if not isinstance(parent.parent, ExpressionRule): + return value + elif not isinstance(parent, ExpressionRule): + return value + return wrap_into_parentheses(value) class ExprTermRule(ExpressionRule): @@ -47,18 +69,18 @@ def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): - self._parentheses = False + parentheses = False if ( isinstance(children[0], LarkToken) and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) and children[-1].lark_name() == "RPAR" ): - self._parentheses = True + parentheses = True else: children = [None, *children, None] self._insert_optionals(children, [1, 3]) - super().__init__(children, meta) + super().__init__(children, meta, parentheses) @property def parentheses(self) -> bool: @@ -71,7 +93,10 @@ def expression(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = self.expression.serialize(options, context) + with context.modify( + inside_parentheses=self.parentheses or context.inside_parentheses + ): + result = self.expression.serialize(options, context) if self.parentheses: result = wrap_into_parentheses(result) @@ -127,6 +152,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + return result @@ -192,6 +220,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) return result @@ -214,6 +245,14 @@ def expr_term(self): def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string( - f"{self.operator}{self.expr_term.serialize(options, context)}" - ) + + with context.modify(inside_dollar_string=True): + result = f"{self.operator}{self.expr_term.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + + return result diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8f1d7352..68c32ebc 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -15,11 +15,13 @@ class SerializationOptions: wrap_tuples: bool = False explicit_blocks: bool = True preserve_heredocs: bool = True + force_operation_parentheses: bool = False @dataclass class SerializationContext: inside_dollar_string: bool = False + inside_parentheses: bool = False def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) From fc49bad9b819f5ce89ea5ed876880248c4f621b9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:20:06 +0100 Subject: [PATCH 13/45] reorganize new and old code --- hcl2/__init__.py | 1 - hcl2/api.py | 21 +- hcl2/{rule_transformer => }/deserializer.py | 20 +- hcl2/dict_transformer.py | 403 -------- hcl2/{rule_transformer => }/editor.py | 0 hcl2/{rule_transformer => }/formatter.py | 14 +- hcl2/hcl2.lark | 207 ++-- hcl2/parser.py | 24 +- hcl2/{rule_transformer => }/processor.py | 0 hcl2/py.typed | 0 hcl2/reconstructor.py | 913 ++++-------------- hcl2/rule_transformer/hcl2.lark | 207 ---- hcl2/rule_transformer/json.py | 12 - hcl2/rule_transformer/reconstructor.py | 238 ----- hcl2/rule_transformer/rules/__init__.py | 0 hcl2/{rule_transformer => rules}/__init__.py | 0 hcl2/{rule_transformer => }/rules/abstract.py | 2 +- hcl2/{rule_transformer => }/rules/base.py | 16 +- .../rules/containers.py | 14 +- .../rules/expressions.py | 10 +- .../rules/for_expressions.py | 12 +- .../{rule_transformer => }/rules/functions.py | 10 +- hcl2/{rule_transformer => }/rules/indexing.py | 12 +- .../rules/literal_rules.py | 4 +- hcl2/{rule_transformer => }/rules/strings.py | 8 +- hcl2/{rule_transformer => }/rules/tokens.py | 2 +- hcl2/{rule_transformer => }/rules/tree.py | 0 .../rules/whitespace.py | 6 +- hcl2/{rule_transformer => }/transformer.py | 20 +- hcl2/{rule_transformer => }/utils.py | 0 30 files changed, 442 insertions(+), 1734 deletions(-) rename hcl2/{rule_transformer => }/deserializer.py (94%) delete mode 100644 hcl2/dict_transformer.py rename hcl2/{rule_transformer => }/editor.py (100%) rename hcl2/{rule_transformer => }/formatter.py (94%) rename hcl2/{rule_transformer => }/processor.py (100%) delete mode 100644 hcl2/py.typed delete mode 100644 hcl2/rule_transformer/hcl2.lark delete mode 100644 hcl2/rule_transformer/json.py delete mode 100644 hcl2/rule_transformer/reconstructor.py delete mode 100644 hcl2/rule_transformer/rules/__init__.py rename hcl2/{rule_transformer => rules}/__init__.py (100%) rename hcl2/{rule_transformer => }/rules/abstract.py (97%) rename hcl2/{rule_transformer => }/rules/base.py (88%) rename hcl2/{rule_transformer => }/rules/containers.py (93%) rename hcl2/{rule_transformer => }/rules/expressions.py (95%) rename hcl2/{rule_transformer => }/rules/for_expressions.py (95%) rename hcl2/{rule_transformer => }/rules/functions.py (90%) rename hcl2/{rule_transformer => }/rules/indexing.py (94%) rename hcl2/{rule_transformer => }/rules/literal_rules.py (85%) rename hcl2/{rule_transformer => }/rules/strings.py (94%) rename hcl2/{rule_transformer => }/rules/tokens.py (98%) rename hcl2/{rule_transformer => }/rules/tree.py (100%) rename hcl2/{rule_transformer => }/rules/whitespace.py (90%) rename hcl2/{rule_transformer => }/transformer.py (93%) rename hcl2/{rule_transformer => }/utils.py (100%) diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 62f5a198..2d5dad09 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -11,7 +11,6 @@ parse, parses, transform, - reverse_transform, writes, ) diff --git a/hcl2/api.py b/hcl2/api.py index 1cec02a2..7c384c53 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -2,9 +2,9 @@ from typing import TextIO from lark.tree import Tree -from hcl2.parser import parser, reconstruction_parser -from hcl2.dict_transformer import DictTransformer -from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer +from hcl2.parser import parser +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer def load(file: TextIO, with_meta=False) -> dict: @@ -27,7 +27,7 @@ def loads(text: str, with_meta=False) -> dict: # This means that all blocks must end in a new line even if the file ends # Append a new line as a temporary fix tree = parser().parse(text + "\n") - return DictTransformer(with_meta=with_meta).transform(tree) + return RuleTransformer().transform(tree) def parse(file: TextIO) -> Tree: @@ -41,7 +41,7 @@ def parses(text: str) -> Tree: """Load HCL2 syntax tree from a string. :param text: Text with hcl2 to be loaded as a dict. """ - return reconstruction_parser().parse(text) + return parser().parse(text) def transform(ast: Tree, with_meta=False) -> dict: @@ -50,18 +50,11 @@ def transform(ast: Tree, with_meta=False) -> dict: :param with_meta: If set to true then adds `__start_line__` and `__end_line__` parameters to the output dict. Default to false. """ - return DictTransformer(with_meta=with_meta).transform(ast) - - -def reverse_transform(hcl2_dict: dict) -> Tree: - """Convert a dictionary to an HCL2 AST. - :param hcl2_dict: a dictionary produced by `load` or `transform` - """ - return HCLReverseTransformer().transform(hcl2_dict) + return RuleTransformer().transform(ast) def writes(ast: Tree) -> str: """Convert an HCL2 syntax tree to a string. :param ast: HCL2 syntax tree, output from `parse` or `parses` """ - return HCLReconstructor(reconstruction_parser()).reconstruct(ast) + return HCLReconstructor().reconstruct(ast) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/deserializer.py similarity index 94% rename from hcl2/rule_transformer/deserializer.py rename to hcl2/deserializer.py index 56e1ad44..2290809c 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/deserializer.py @@ -8,14 +8,14 @@ from hcl2 import parses from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.rules.base import ( BlockRule, AttributeRule, BodyRule, StartRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( TupleRule, ObjectRule, ObjectElemRule, @@ -23,20 +23,20 @@ ObjectElemKeyDotAccessor, ObjectElemKeyRule, ) -from hcl2.rule_transformer.rules.expressions import ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import ( IdentifierRule, IntLitRule, FloatLitRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( StringRule, InterpolationRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, EQ, DBLQUOTE, @@ -55,9 +55,9 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.transformer import RuleTransformer +from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @dataclass diff --git a/hcl2/dict_transformer.py b/hcl2/dict_transformer.py deleted file mode 100644 index 64c58bcb..00000000 --- a/hcl2/dict_transformer.py +++ /dev/null @@ -1,403 +0,0 @@ -"""A Lark Transformer for transforming a Lark parse tree into a Python dict""" -import json -import re -import sys -from collections import namedtuple -from typing import List, Dict, Any - -from lark import Token -from lark.tree import Meta -from lark.visitors import Transformer, Discard, _DiscardType, v_args - -from .reconstructor import reverse_quotes_within_interpolation - - -HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) -HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - - -START_LINE = "__start_line__" -END_LINE = "__end_line__" - - -Attribute = namedtuple("Attribute", ("key", "value")) - - -# pylint: disable=missing-function-docstring,unused-argument -class DictTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a dict. - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self.with_meta = with_meta - super().__init__() - - def float_lit(self, args: List) -> float: - value = "".join([self.to_tf_inline(arg) for arg in args]) - if "e" in value: - return self.to_string_dollar(value) - return float(value) - - def int_lit(self, args: List) -> int: - return int("".join([self.to_tf_inline(arg) for arg in args])) - - def expr_term(self, args: List) -> Any: - args = self.strip_new_line_tokens(args) - - if args[0] == "true": - return True - if args[0] == "false": - return False - if args[0] == "null": - return None - - if args[0] == "(" and args[-1] == ")": - return "".join(str(arg) for arg in args) - - return args[0] - - def index_expr_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"{args[0]}{args[1]}" - - def index(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"[{args[0]}]" - - def get_attr_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def get_attr(self, args: List) -> str: - return f".{args[0]}" - - def attr_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def attr_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f".*{args_str}" - - def full_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def full_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f"[*]{args_str}" - - def tuple(self, args: List) -> List: - return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)] - - def object_elem(self, args: List) -> Dict: - # This returns a dict with a single key/value pair to make it easier to merge these - # into a bigger dict that is returned by the "object" function - - key = str(args[0].children[0]) - if not re.match(r".*?(\${).*}.*", key): - # do not strip quotes of a interpolation string - key = self.strip_quotes(key) - - value = self.to_string_dollar(args[2]) - return {key: value} - - def object_elem_key_dot_accessor(self, args: List) -> str: - return "".join(args) - - def object_elem_key_expression(self, args: List) -> str: - return self.to_string_dollar("".join(args)) - - def object(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - result: Dict[str, Any] = {} - for arg in args: - if ( - isinstance(arg, Token) and arg.type == "COMMA" - ): # skip optional comma at the end of object element - continue - - result.update(arg) - return result - - def function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 1: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard] - ) - return f"{args[0]}({args_str})" - - def provider_function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 5: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard] - ) - provider_func = "::".join([args[0], args[2], args[4]]) - return f"{provider_func}({args_str})" - - def arguments(self, args: List) -> List: - return self.process_nulls(args) - - @v_args(meta=True) - def block(self, meta: Meta, args: List) -> Dict: - *block_labels, block_body = args - result: Dict[str, Any] = block_body - if self.with_meta: - result.update( - { - START_LINE: meta.line, - END_LINE: meta.end_line, - } - ) - - # create nested dict. i.e. {label1: {label2: {labelN: result}}} - for label in reversed(block_labels): - label_str = self.strip_quotes(label) - result = {label_str: result} - - return result - - def attribute(self, args: List) -> Attribute: - key = str(args[0]) - if key.startswith('"') and key.endswith('"'): - key = key[1:-1] - value = self.to_string_dollar(args[2]) - return Attribute(key, value) - - def conditional(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return f"{args[0]} ? {args[1]} : {args[2]}" - - def binary_op(self, args: List) -> str: - return " ".join( - [self.unwrap_string_dollar(self.to_tf_inline(arg)) for arg in args] - ) - - def unary_op(self, args: List) -> str: - args = self.process_nulls(args) - return "".join([self.to_tf_inline(arg) for arg in args]) - - def binary_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def body(self, args: List) -> Dict[str, List]: - # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies - # --- - # A body is a collection of associated attributes and blocks. - # - # An attribute definition assigns a value to a particular attribute - # name within a body. Each distinct attribute name may be defined no - # more than once within a single body. - # - # A block creates a child body that is annotated with a block type and - # zero or more block labels. Blocks create a structural hierarchy which - # can be interpreted by the calling application. - # --- - # - # There can be more than one child body with the same block type and - # labels. This means that all blocks (even when there is only one) - # should be transformed into lists of blocks. - args = self.strip_new_line_tokens(args) - attributes = set() - result: Dict[str, Any] = {} - for arg in args: - if isinstance(arg, Attribute): - if arg.key in result: - raise RuntimeError(f"{arg.key} already defined") - result[arg.key] = arg.value - attributes.add(arg.key) - else: - # This is a block. - for key, value in arg.items(): - key = str(key) - if key in result: - if key in attributes: - raise RuntimeError(f"{key} already defined") - result[key].append(value) - else: - result[key] = [value] - - return result - - def start(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - return args[0] - - def binary_operator(self, args: List) -> str: - return str(args[0]) - - def heredoc_template(self, args: List) -> str: - match = HEREDOC_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - result = match.group(2).rstrip(trim_chars) - return f'"{result}"' - - def heredoc_template_trim(self, args: List) -> str: - # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions - # This is a special version of heredocs that are declared with "<<-" - # This will calculate the minimum number of leading spaces in each line of a heredoc - # and then remove that number of spaces from each line - match = HEREDOC_TRIM_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - text = match.group(2).rstrip(trim_chars) - lines = text.split("\n") - - # calculate the min number of leading spaces in each line - min_spaces = sys.maxsize - for line in lines: - leading_spaces = len(line) - len(line.lstrip(" ")) - min_spaces = min(min_spaces, leading_spaces) - - # trim off that number of leading spaces from each line - lines = [line[min_spaces:] for line in lines] - - return '"%s"' % "\n".join(lines) - - def new_line_or_comment(self, args: List) -> _DiscardType: - return Discard - - # def EQ(self, args: List): - # print("EQ", args) - # return args - - def for_tuple_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - return f"[{for_expr}]" - - def for_intro(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_cond(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_object_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - # doubled curly braces stands for inlining the braces - # and the third pair of braces is for the interpolation - # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" - return f"{{{for_expr}}}" - - def string(self, args: List) -> str: - return '"' + "".join(args) + '"' - - def string_part(self, args: List) -> str: - value = self.to_tf_inline(args[0]) - if value.startswith('"') and value.endswith('"'): - value = value[1:-1] - return value - - def interpolation(self, args: List) -> str: - return '"${' + str(args[0]) + '}"' - - def strip_new_line_tokens(self, args: List) -> List: - """ - Remove new line and Discard tokens. - The parser will sometimes include these in the tree so we need to strip them out here - """ - return [arg for arg in args if arg != "\n" and arg is not Discard] - - def is_string_dollar(self, value: str) -> bool: - if not isinstance(value, str): - return False - return value.startswith("${") and value.endswith("}") - - def to_string_dollar(self, value: Any) -> Any: - """Wrap a string in ${ and }""" - if not isinstance(value, str): - return value - # if it's already wrapped, pass it unmodified - if self.is_string_dollar(value): - return value - - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - - if self.is_type_keyword(value): - return value - - return f"${{{value}}}" - - def unwrap_string_dollar(self, value: str): - if self.is_string_dollar(value): - return value[2:-1] - return value - - def strip_quotes(self, value: Any) -> Any: - """Remove quote characters from the start and end of a string""" - if isinstance(value, str): - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - return value - - def process_escape_sequences(self, value: str) -> str: - """Process HCL escape sequences within quoted template expressions.""" - if isinstance(value, str): - # normal escape sequences - value = value.replace("\\n", "\n") - value = value.replace("\\r", "\r") - value = value.replace("\\t", "\t") - value = value.replace('\\"', '"') - value = value.replace("\\\\", "\\") - - # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched - # for now, but this method can be extended in the future - return value - - def process_nulls(self, args: List) -> List: - return ["null" if arg is None else arg for arg in args] - - def to_tf_inline(self, value: Any) -> str: - """ - Converts complex objects (e.g.) dicts to an "inline" HCL syntax - for use in function calls and ${interpolation} strings - """ - if isinstance(value, dict): - dict_v = json.dumps(value) - return reverse_quotes_within_interpolation(dict_v) - if isinstance(value, list): - value = [self.to_tf_inline(item) for item in value] - return f"[{', '.join(value)}]" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, str): - return value - if isinstance(value, (int, float)): - return str(value) - if value is None: - return "None" - - raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}") - - def identifier(self, value: Any) -> Any: - # Making identifier a token by capitalizing it to IDENTIFIER - # seems to return a token object instead of the str - # So treat it like a regular rule - # In this case we just convert the whole thing to a string - return str(value[0]) diff --git a/hcl2/rule_transformer/editor.py b/hcl2/editor.py similarity index 100% rename from hcl2/rule_transformer/editor.py rename to hcl2/editor.py diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/formatter.py similarity index 94% rename from hcl2/rule_transformer/formatter.py rename to hcl2/formatter.py index ad0247dc..205d2ddd 100644 --- a/hcl2/rule_transformer/formatter.py +++ b/hcl2/formatter.py @@ -2,21 +2,21 @@ from dataclasses import dataclass from typing import List -from hcl2.rule_transformer.rules.abstract import LarkElement -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement +from hcl2.rules.base import ( StartRule, BlockRule, AttributeRule, BodyRule, ) -from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ) -from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rules.whitespace import NewLineOrCommentRule @dataclass diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 78ba3ca6..63154efb 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,27 +1,29 @@ -start : body -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? "{" body "}" -new_line_or_comment: ( NL_OR_COMMENT )+ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ -identifier : NAME | IN | FOR | IF | FOR_EACH -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +// Keywords IF : "if" IN : "in" FOR : "for" FOR_EACH : "for_each" -?expression : expr_term | operation | conditional -conditional : expression "?" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? expression +// Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) -?operation : unary_op | binary_op -!unary_op : ("-" | "!") expr_term -binary_op : expression binary_term new_line_or_comment? -!binary_operator : BINARY_OP -binary_term : binary_operator new_line_or_comment? expression -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +// Operators DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -35,74 +37,171 @@ PERCENT : "%" DOUBLE_AMP : "&&" DOUBLE_PIPE : "||" PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation LPAR : "(" RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" COMMA : "," DOT : "." +EQ : /[ \t]*=(?!=|>)/ COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr +interpolation: INTERP_START expression RBRACE + +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term +!unary_op : (MINUS | NOT) expr_term + +// Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr -string: "\"" string_part* "\"" -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation -interpolation: "${" expression "}" -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ - -int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+ -!float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) ("." DECIMAL+)? (EXP_MARK) -NEGATIVE_DECIMAL : "-" DECIMAL -DECIMAL : "0".."9" -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -EQ : /[ \t]*=(?!=|>)/ - -tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]" -object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}" +// Collections +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR object_elem_key_dot_accessor : identifier (DOT identifier)+ -heredoc_template : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -heredoc_template_trim : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM -function_call : identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" -arguments : (expression (new_line_or_comment* "," new_line_or_comment* expression)* ("," | "...")? new_line_or_comment*) -colons: "::" -provider_function_call: identifier colons identifier colons identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" +// Functions +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) +// Indexing and attribute access index_expr_term : expr_term index get_attr_expr_term : expr_term get_attr attr_splat_expr_term : expr_term attr_splat full_splat_expr_term : expr_term full_splat -index : "[" new_line_or_comment? expression new_line_or_comment? "]" | "." DECIMAL+ -get_attr : "." identifier -attr_splat : ".*" get_attr* -full_splat : "[*]" (get_attr | index)* +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT (get_attr | index)* +full_splat : FULL_SPLAT_START (get_attr | index)* -FOR_OBJECT_ARROW : "=>" -!for_tuple_expr : "[" new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? "]" -!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" -!for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? -!for_cond : "if" new_line_or_comment? expression - -%ignore /[ \t]+/ +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/parser.py b/hcl2/parser.py index 3e524736..a33fe5f8 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,31 +12,9 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark", + "hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, propagate_positions=True, ) - - -@functools.lru_cache() -def reconstruction_parser() -> Lark: - """ - Build parser for transforming python structures into HCL2 text. - This is duplicated from `parser` because we need different options here for - the reconstructor. Please make sure changes are kept in sync between the two - if necessary. - """ - return Lark.open( - "rule_transformer/hcl2.lark", - parser="lalr", - # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: - # - # https://github.com/lark-parser/lark/issues/1472 - # - # cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar - rel_to=__file__, - propagate_positions=True, - maybe_placeholders=False, # Needed for reconstruction - ) diff --git a/hcl2/rule_transformer/processor.py b/hcl2/processor.py similarity index 100% rename from hcl2/rule_transformer/processor.py rename to hcl2/processor.py diff --git a/hcl2/py.typed b/hcl2/py.typed deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 555edcf6..e92f7040 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,739 +1,238 @@ -"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" - -import re -from typing import List, Dict, Callable, Optional, Union, Any, Tuple - -from lark import Lark, Tree -from lark.grammar import Terminal, Symbol -from lark.lexer import Token, PatternStr, TerminalDef -from lark.reconstruct import Reconstructor -from lark.tree_matcher import is_discarded_terminal -from lark.visitors import Transformer_InPlace -from regex import regex - -from hcl2.const import START_LINE_KEY, END_LINE_KEY -from hcl2.parser import reconstruction_parser - - -# function to remove the backslashes within interpolated portions -def reverse_quotes_within_interpolation(interp_s: str) -> str: - """ - A common operation is to `json.dumps(s)` where s is a string to output in - HCL. This is useful for automatically escaping any quotes within the - string, but this escapes quotes within interpolation incorrectly. This - method removes any erroneous escapes within interpolated segments of a - string. - """ - return re.sub(r"\$\{(.*)}", lambda m: m.group(0).replace('\\"', '"'), interp_s) - - -class WriteTokensAndMetaTransformer(Transformer_InPlace): - """ - Inserts discarded tokens into their correct place, according to the rules - of grammar, and annotates with metadata during reassembly. The metadata - tracked here include the terminal which generated a particular string - output, and the rule that that terminal was matched on. - - This is a modification of lark.reconstruct.WriteTokensTransformer - """ - - tokens: Dict[str, TerminalDef] - term_subs: Dict[str, Callable[[Symbol], str]] - - def __init__( - self, - tokens: Dict[str, TerminalDef], - term_subs: Dict[str, Callable[[Symbol], str]], - ) -> None: - super().__init__() - self.tokens = tokens - self.term_subs = term_subs - - def __default__(self, data, children, meta): - """ - This method is called for every token the transformer visits. - """ - - if not getattr(meta, "match_tree", False): - return Tree(data, children) - iter_args = iter( - [child[2] if isinstance(child, tuple) else child for child in children] - ) - to_write = [] - for sym in meta.orig_expansion: - if is_discarded_terminal(sym): - try: - value = self.term_subs[sym.name](sym) - except KeyError as exc: - token = self.tokens[sym.name] - if not isinstance(token.pattern, PatternStr): - raise NotImplementedError( - f"Reconstructing regexps not supported yet: {token}" - ) from exc - - value = token.pattern.value - - # annotate the leaf with the specific rule (data) and terminal - # (sym) it was generated from - to_write.append((data, sym, value)) - else: - item = next(iter_args) - if isinstance(item, list): - to_write += item - else: - if isinstance(item, Token): - # annotate the leaf with the specific rule (data) and - # terminal (sym) it was generated from - to_write.append((data, sym, item)) - else: - to_write.append(item) - - return to_write - - -class HCLReconstructor(Reconstructor): +from typing import List, Union + +from lark import Tree, Token +from hcl2.rules import tokens +from hcl2.rules.base import BlockRule +from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) + + +class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - def __init__( - self, - parser: Lark, - term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None, - ): - Reconstructor.__init__(self, parser, term_subs) - - self.write_tokens: WriteTokensAndMetaTransformer = ( - WriteTokensAndMetaTransformer( - {token.name: token for token in self.tokens}, term_subs or {} - ) - ) - - # these variables track state during reconstruction to enable us to make - # informed decisions about formatting output. They are primarily used - # by the _should_add_space(...) method. - self._last_char_space = True - self._last_terminal: Union[Terminal, None] = None - self._last_rule: Union[Tree, Token, None] = None - self._deferred_item = None - - def should_be_wrapped_in_spaces(self, terminal: Terminal) -> bool: - """Whether given terminal should be wrapped in spaces""" - return terminal.name in { - "IF", - "IN", - "FOR", - "FOR_EACH", - "FOR_OBJECT_ARROW", - "COLON", - "QMARK", - "BINARY_OP", - } - - def _is_equals_sign(self, terminal) -> bool: - return ( - isinstance(self._last_rule, Token) - and self._last_rule.value in ("attribute", "object_elem") - and self._last_terminal == Terminal("EQ") - and terminal != Terminal("NL_OR_COMMENT") - ) - - # pylint: disable=too-many-branches, too-many-return-statements - def _should_add_space(self, rule, current_terminal, is_block_label: bool = False): - """ - This method documents the situations in which we add space around - certain tokens while reconstructing the generated HCL. - - Additional rules can be added here if the generated HCL has - improper whitespace (affecting parse OR affecting ability to perfectly - reconstruct a file down to the whitespace level.) - - It has the following information available to make its decision: - - - the last token (terminal) we output - - the last rule that token belonged to - - the current token (terminal) we're about to output - - the rule the current token belongs to - - This should be sufficient to make a spacing decision. - """ - - # we don't need to add multiple spaces - if self._last_char_space: - return False + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } - # we don't add a space at the start of the file - if not self._last_terminal or not self._last_rule: + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: return False - if self._is_equals_sign(current_terminal): - return True + # Don't add space at the beginning + if self._last_token_name is None: + return False - if is_block_label: - pass - # print(rule, self._last_rule, current_terminal, self._last_terminal) + if isinstance(current_node, Token): + token_type = current_node.type - if is_block_label and isinstance(rule, Token) and rule.value == "string": + # Space before '{' in blocks if ( - current_terminal == self._last_terminal == Terminal("DBLQUOTE") - or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("IDENTIFIER") + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() ): - # print("true") return True - # if we're in a ternary or binary operator, add space around the operator - if ( - isinstance(rule, Token) - and rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we just left a ternary or binary operator, add space around the - # operator unless there's a newline already - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a for or if statement and find a keyword, add a space - if ( - isinstance(rule, Token) - and rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we've just left a for or if statement and find a keyword, add a - # space, unless we have a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a block - if (isinstance(rule, Token) and rule.value == "block") or ( - isinstance(rule, str) and re.match(r"^__block_(star|plus)_.*", rule) - ): - # always add space before the starting brace - if current_terminal == Terminal("LBRACE"): + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): return True - # always add space before the closing brace - if current_terminal == Terminal( - "RBRACE" - ) and self._last_terminal != Terminal("LBRACE"): + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + return True - # always add space between string literals - if current_terminal == Terminal("STRING_CHARS"): + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False return True - # if we just opened a block, add a space, unless the block is empty - # or has a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value == "block" - and self._last_terminal == Terminal("LBRACE") - and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")] - ): - return True - - # if we're in a tuple or function arguments (this rule matches commas between items) - if isinstance(self._last_rule, str) and re.match( - r"^__(tuple|arguments)_(star|plus)_.*", self._last_rule - ): - - # string literals, decimals, and identifiers should always be - # preceded by a space if they're following a comma in a tuple or - # function arg - if current_terminal in [ - Terminal("DBLQUOTE"), - Terminal("DECIMAL"), - Terminal("NAME"), - Terminal("NEGATIVE_DECIMAL"), + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), ]: return True - # the catch-all case, we're not sure, so don't add a space - return False - - def _reconstruct(self, tree, is_block_label=False): - unreduced_tree = self.match_tree(tree, tree.data) - res = self.write_tokens.transform(unreduced_tree) - for item in res: - # any time we encounter a child tree, we recurse - if isinstance(item, Tree): - yield from self._reconstruct( - item, (unreduced_tree.data == "block" and item.data != "body") - ) - - # every leaf should be a tuple, which contains information about - # which terminal the leaf represents - elif isinstance(item, tuple): - rule, terminal, value = item - - # first, handle any deferred items - if self._deferred_item is not None: - ( - deferred_rule, - deferred_terminal, - deferred_value, - ) = self._deferred_item - - # if we deferred a comma and the next character ends a - # parenthesis or block, we can throw it out - if deferred_terminal == Terminal("COMMA") and terminal in [ - Terminal("RPAR"), - Terminal("RBRACE"), - ]: - pass - # in any other case, we print the deferred item - else: - yield deferred_value - - # and do our bookkeeping - self._last_terminal = deferred_terminal - self._last_rule = deferred_rule - if deferred_value and not deferred_value[-1].isspace(): - self._last_char_space = False - - # clear the deferred item - self._deferred_item = None - - # potentially add a space before the next token - if self._should_add_space(rule, terminal, is_block_label): - yield " " - self._last_char_space = True - - # potentially defer the item if needed - if terminal in [Terminal("COMMA")]: - self._deferred_item = item - else: - # otherwise print the next token - yield value - - # and do our bookkeeping so we can make an informed - # decision about formatting next time - self._last_terminal = terminal - self._last_rule = rule - if value: - self._last_char_space = value[-1].isspace() - - else: - raise RuntimeError(f"Unknown bare token type: {item}") - - def reconstruct(self, tree, postproc=None, insert_spaces=False): - """Convert a Lark.Tree AST back into a string representation of HCL.""" - return Reconstructor.reconstruct( - self, - tree, - postproc, - insert_spaces, - ) - - -class HCLReverseTransformer: - """ - The reverse of hcl2.transformer.DictTransformer. This method attempts to - convert a dict back into a working AST, which can be written back out. - """ - - @staticmethod - def _name_to_identifier(name: str) -> Tree: - """Converts a string to a NAME token within an identifier rule.""" - return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) - - @staticmethod - def _escape_interpolated_str(interp_s: str) -> str: - if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"): - # For heredoc strings, preserve their format exactly - return reverse_quotes_within_interpolation(interp_s) - # Escape backslashes first (very important to do this first) - escaped = interp_s.replace("\\", "\\\\") - # Escape quotes - escaped = escaped.replace('"', '\\"') - # Escape control characters - escaped = escaped.replace("\n", "\\n") - escaped = escaped.replace("\r", "\\r") - escaped = escaped.replace("\t", "\\t") - escaped = escaped.replace("\b", "\\b") - escaped = escaped.replace("\f", "\\f") - # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(f"{escaped}") - return interp_s - - @staticmethod - def _block_has_label(block: dict) -> bool: - return len(block.keys()) == 1 - - def __init__(self): - pass - - def transform(self, hcl_dict: dict) -> Tree: - """Given a dict, return a Lark.Tree representing the HCL AST.""" - level = 0 - body = self._transform_dict_to_body(hcl_dict, level) - start = Tree(Token("RULE", "start"), [body]) - return start - - @staticmethod - def _is_string_wrapped_tf(interp_s: str) -> bool: - """ - Determines whether a string is a complex HCL data structure - wrapped in ${ interpolation } characters. - """ - if not interp_s.startswith("${") or not interp_s.endswith("}"): - return False + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True - nested_tokens = [] - for match in re.finditer(r"\$?\{|}", interp_s): - if match.group(0) in ["${", "{"]: - nested_tokens.append(match.group(0)) - elif match.group(0) == "}": - nested_tokens.pop() - - # if we exit ${ interpolation } before the end of the string, - # this interpolated string has string parts and can't represent - # a valid HCL expression on its own (without quotes) - if len(nested_tokens) == 0 and match.end() != len(interp_s): - return False + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True - return True - - @classmethod - def _unwrap_interpolation(cls, value: str) -> str: - if cls._is_string_wrapped_tf(value): - return value[2:-1] - return value - - def _newline(self, level: int, count: int = 1) -> Tree: - return Tree( - Token("RULE", "new_line_or_comment"), - [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], - ) - - def _build_string_rule(self, string: str, level: int = 0) -> Tree: - # grammar in hcl2.lark defines that a string is built of any number of string parts, - # each string part can be either interpolation expression, escaped interpolation string - # or regular string - # this method build hcl2 string rule based on arbitrary string, - # splitting such string into individual parts and building a lark tree out of them - # - result = [] + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(string) if part != ""] - # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] - # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] - - for part in parts: - if part.startswith("$${") and part.endswith("}"): - result.append(Token("ESCAPED_INTERPOLATION", part)) - - # unwrap interpolation expression and recurse into it - elif part.startswith("${") and part.endswith("}"): - part = part[2:-1] - if part.startswith('"') and part.endswith('"'): - part = part[1:-1] - part = self._transform_value_to_expr_term(part, level) - else: - part = Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", part)])], - ) - - result.append(Tree(Token("RULE", "interpolation"), [part])) - - else: - result.append(Token("STRING_CHARS", part)) - - result = [Tree(Token("RULE", "string_part"), [element]) for element in result] - return Tree(Token("RULE", "string"), result) - - def _is_block(self, value: Any) -> bool: - if isinstance(value, dict): - block_body = value - if START_LINE_KEY in block_body.keys() or END_LINE_KEY in block_body.keys(): + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - try: - # if block is labeled, actual body might be nested - # pylint: disable=W0612 - block_label, block_body = next(iter(value.items())) - except StopIteration: - # no more potential labels = nothing more to check + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): return False - return self._is_block(block_body) + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data - if isinstance(value, list): - if len(value) > 0: - return self._is_block(value[0]) + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True return False - def _calculate_block_labels(self, block: dict) -> Tuple[List[str], dict]: - # if block doesn't have a label - if len(block.keys()) != 1: - return [], block - - # otherwise, find the label - curr_label = list(block)[0] - potential_body = block[curr_label] - - # __start_line__ and __end_line__ metadata are not labels - if ( - START_LINE_KEY in potential_body.keys() - or END_LINE_KEY in potential_body.keys() - ): - return [curr_label], potential_body - - # recurse and append the label - next_label, block_body = self._calculate_block_labels(potential_body) - return [curr_label] + next_label, block_body - - # pylint:disable=R0914 - def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: - # we add a newline at the top of a body within a block, not the root body - # >2 here is to ignore the __start_line__ and __end_line__ metadata - if level > 0 and len(hcl_dict) > 2: - children = [self._newline(level)] + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] else: - children = [] - - # iterate through each attribute or sub-block of this block - for key, value in hcl_dict.items(): - if key in [START_LINE_KEY, END_LINE_KEY]: - continue - - # construct the identifier, whether that be a block type name or an attribute key - identifier_name = self._name_to_identifier(key) - - # first, check whether the value is a "block" - if self._is_block(value): - for block_v in value: - block_labels, block_body_dict = self._calculate_block_labels( - block_v - ) - block_label_trees = [ - self._build_string_rule(block_label, level) - for block_label in block_labels - ] - block_body = self._transform_dict_to_body( - block_body_dict, level + 1 - ) - - # create our actual block to add to our own body - block = Tree( - Token("RULE", "block"), - [identifier_name] + block_label_trees + [block_body], - ) - children.append(block) - # add empty line after block - new_line = self._newline(level - 1) - # add empty line with indentation for next element in the block - new_line.children.append(self._newline(level).children[0]) - - children.append(new_line) - - # if the value isn't a block, it's an attribute - else: - expr_term = self._transform_value_to_expr_term(value, level) - attribute = Tree( - Token("RULE", "attribute"), - [identifier_name, Token("EQ", " ="), expr_term], - ) - children.append(attribute) - children.append(self._newline(level)) - - # since we're leaving a block body here, reduce the indentation of the - # final newline if it exists - if ( - len(children) > 0 - and isinstance(children[-1], Tree) - and children[-1].data.type == "RULE" - and children[-1].data.value == "new_line_or_comment" - ): - children[-1] = self._newline(level - 1) - - return Tree(Token("RULE", "body"), children) - - # pylint: disable=too-many-branches, too-many-return-statements too-many-statements - def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: - """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) - - Anything passed to this function is treated "naively". Any lists passed - are assumed to be tuples, and any dicts passed are assumed to be objects. - No more checks will be performed for either to see if they are "blocks" - as this check happens in `_transform_dict_to_body`. - """ - - # for lists, recursively turn the child elements into expr_terms and - # store within a tuple - if isinstance(value, list): - tuple_tree = Tree( - Token("RULE", "tuple"), - [ - self._transform_value_to_expr_term(tuple_v, level) - for tuple_v in value - ], - ) - return Tree(Token("RULE", "expr_term"), [tuple_tree]) - - if value is None: - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", "null")])], - ) - - # for dicts, recursively turn the child k/v pairs into object elements - # and store within an object - if isinstance(value, dict): - elements = [] - - # if the object has elements, put it on a newline - if len(value) > 0: - elements.append(self._newline(level + 1)) - - # iterate through the items and add them to the object - for i, (k, dict_v) in enumerate(value.items()): - if k in [START_LINE_KEY, END_LINE_KEY]: - continue - - value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1) - k = self._unwrap_interpolation(k) - elements.append( - Tree( - Token("RULE", "object_elem"), - [ - Tree( - Token("RULE", "object_elem_key"), - [Tree(Token("RULE", "identifier"), [Token("NAME", k)])], - ), - Token("EQ", " ="), - value_expr_term, - ], - ) - ) - - # add indentation appropriately - if i < len(value) - 1: - elements.append(self._newline(level + 1)) - else: - elements.append(self._newline(level)) - return Tree( - Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elements)] - ) - - # treat booleans appropriately - if isinstance(value, bool): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "identifier"), - [Token("NAME", "true" if value else "false")], - ) - ], - ) - - # store integers as literals, digit by digit - if isinstance(value, int): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "int_lit"), - [Token("DECIMAL", digit) for digit in str(value)], - ) - ], - ) - - if isinstance(value, float): - value = str(value) - literal = [] - - if value[0] == "-": - # pop two first chars - minus and a digit - literal.append(Token("NEGATIVE_DECIMAL", value[:2])) - value = value[2:] - - while value != "": - char = value[0] - - if char == ".": - # current char marks beginning of decimal part: pop all remaining chars and end the loop - literal.append(Token("DOT", char)) - literal.extend(Token("DECIMAL", char) for char in value[1:]) - break - - if char == "e": - # current char marks beginning of e-notation: pop all remaining chars and end the loop - literal.append(Token("EXP_MARK", value)) - break - - literal.append(Token("DECIMAL", char)) - value = value[1:] - - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "float_lit"), literal)], - ) - - # store strings as single literals - if isinstance(value, str): - # potentially unpack a complex syntax structure - if self._is_string_wrapped_tf(value): - # we have to unpack it by parsing it - wrapped_value = re.match(r"\$\{(.*)}", value).group(1) # type:ignore - ast = reconstruction_parser().parse(f"value = {wrapped_value}") - - if ast.data != Token("RULE", "start"): - raise RuntimeError("Token must be `start` RULE") - - body = ast.children[0] - if body.data != Token("RULE", "body"): - raise RuntimeError("Token must be `body` RULE") - - attribute = body.children[0] - if attribute.data != Token("RULE", "attribute"): - raise RuntimeError("Token must be `attribute` RULE") - - if attribute.children[1] != Token("EQ", " ="): - raise RuntimeError("Token must be `EQ (=)` rule") - - parsed_value = attribute.children[2] - return parsed_value - - # otherwise it's a string - return Tree( - Token("RULE", "expr_term"), - [self._build_string_rule(self._escape_interpolated_str(value), level)], - ) - - # otherwise, we don't know the type - raise RuntimeError(f"Unknown type to transform {type(value)}") + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark deleted file mode 100644 index 63154efb..00000000 --- a/hcl2/rule_transformer/hcl2.lark +++ /dev/null @@ -1,207 +0,0 @@ -// ============================================================================ -// Terminals -// ============================================================================ - -// Whitespace and Comments -NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ - -// Keywords -IF : "if" -IN : "in" -FOR : "for" -FOR_EACH : "for_each" - - -// Literals -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ -DECIMAL : "0".."9" -NEGATIVE_DECIMAL : "-" DECIMAL -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ -FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) - -// Operators -DOUBLE_EQ : "==" -NEQ : "!=" -LT : "<" -GT : ">" -LEQ : "<=" -GEQ : ">=" -MINUS : "-" -ASTERISK : "*" -SLASH : "/" -PERCENT : "%" -DOUBLE_AMP : "&&" -DOUBLE_PIPE : "||" -PLUS : "+" -NOT : "!" -QMARK : "?" - -// Punctuation -LPAR : "(" -RPAR : ")" -LBRACE : "{" -RBRACE : "}" -LSQB : "[" -RSQB : "]" -COMMA : "," -DOT : "." -EQ : /[ \t]*=(?!=|>)/ -COLON : ":" -DBLQUOTE : "\"" - -// Interpolation -INTERP_START : "${" - -// Splat Operators -ATTR_SPLAT : ".*" -FULL_SPLAT_START : "[*]" - -// Special Operators -FOR_OBJECT_ARROW : "=>" -ELLIPSIS : "..." -COLONS: "::" - -// Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ - -// Ignore whitespace (but not newlines, as they're significant in HCL) -%ignore /[ \t]+/ - -// ============================================================================ -// Rules -// ============================================================================ - -// Top-level structure -start : body - -// Body and basic constructs -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE - -// Whitespace and comments -new_line_or_comment: ( NL_OR_COMMENT )+ - -// Basic literals and identifiers -identifier : NAME -keyword: IN | FOR | IF | FOR_EACH -int_lit: INT_LITERAL -float_lit: FLOAT_LITERAL -string: DBLQUOTE string_part* DBLQUOTE -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation - -// Expressions -?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional - | or_expr -interpolation: INTERP_START expression RBRACE - -// Operator precedence ladder (lowest to highest) -// Each level uses left recursion for left-associativity. -// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain -// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. - -// Logical OR -?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op - | and_expr -or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term -!or_binary_operator : DOUBLE_PIPE -> binary_operator - -// Logical AND -?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op - | eq_expr -and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term -!and_binary_operator : DOUBLE_AMP -> binary_operator - -// Equality -?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op - | rel_expr -eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term -!eq_binary_operator : DOUBLE_EQ -> binary_operator - | NEQ -> binary_operator - -// Relational -?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op - | add_expr -rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term -!rel_binary_operator : LT -> binary_operator - | GT -> binary_operator - | LEQ -> binary_operator - | GEQ -> binary_operator - -// Additive -?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op - | mul_expr -add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term -!add_binary_operator : PLUS -> binary_operator - | MINUS -> binary_operator - -// Multiplicative -?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op - | unary_expr -mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term -!mul_binary_operator : ASTERISK -> binary_operator - | SLASH -> binary_operator - | PERCENT -> binary_operator - -// Unary (highest precedence for operations) -?unary_expr : unary_op | expr_term -!unary_op : (MINUS | NOT) expr_term - -// Expression terms -expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR - | float_lit - | int_lit - | string - | tuple - | object - | identifier - | function_call - | heredoc_template - | heredoc_template_trim - | index_expr_term - | get_attr_expr_term - | attr_splat_expr_term - | full_splat_expr_term - | for_tuple_expr - | for_object_expr - -// Collections -tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB -object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE -object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression -object_elem_key_expression : LPAR expression RPAR -object_elem_key_dot_accessor : identifier (DOT identifier)+ - -// Heredocs -heredoc_template : HEREDOC_TEMPLATE -heredoc_template_trim : HEREDOC_TEMPLATE_TRIM - -// Functions -function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) - -// Indexing and attribute access -index_expr_term : expr_term index -get_attr_expr_term : expr_term get_attr -attr_splat_expr_term : expr_term attr_splat -full_splat_expr_term : expr_term full_splat -?index : braces_index | short_index -braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB -short_index : DOT INT_LITERAL -get_attr : DOT identifier -attr_splat : ATTR_SPLAT (get_attr | index)* -full_splat : FULL_SPLAT_START (get_attr | index)* - -// For expressions -!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB -!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE -!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? -!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py deleted file mode 100644 index 647b6683..00000000 --- a/hcl2/rule_transformer/json.py +++ /dev/null @@ -1,12 +0,0 @@ -from json import JSONEncoder -from typing import Any - -from hcl2.rule_transformer.rules.abstract import LarkRule - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py deleted file mode 100644 index 099beead..00000000 --- a/hcl2/rule_transformer/reconstructor.py +++ /dev/null @@ -1,238 +0,0 @@ -from typing import List, Union - -from lark import Tree, Token -from hcl2.rule_transformer.rules import tokens -from hcl2.rule_transformer.rules.base import BlockRule -from hcl2.rule_transformer.rules.for_expressions import ForIntroRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ( - ExprTermRule, - ConditionalRule, - UnaryOpRule, -) - - -class HCLReconstructor: - """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - - _binary_op_types = { - "DOUBLE_EQ", - "NEQ", - "LT", - "GT", - "LEQ", - "GEQ", - "MINUS", - "ASTERISK", - "SLASH", - "PERCENT", - "DOUBLE_AMP", - "DOUBLE_PIPE", - "PLUS", - } - - def __init__(self): - self._reset_state() - - def _reset_state(self): - """State tracking for formatting decisions""" - self._last_was_space = True - self._current_indent = 0 - self._last_token_name = None - self._last_rule_name = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False - - def _should_add_space_before( - self, current_node: Union[Tree, Token], parent_rule_name: str = None - ) -> bool: - """Determine if we should add a space before the current token/rule.""" - - # Don't add space if we already have one - if self._last_was_space: - return False - - # Don't add space at the beginning - if self._last_token_name is None: - return False - - if isinstance(current_node, Token): - token_type = current_node.type - - # Space before '{' in blocks - if ( - token_type == tokens.LBRACE.lark_name() - and parent_rule_name == BlockRule.lark_name() - ): - return True - - # Space around Conditional Expression operators - if ( - parent_rule_name == ConditionalRule.lark_name() - and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - or self._last_token_name - in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - ): - return True - - # Space after - if ( - parent_rule_name == ForIntroRule.lark_name() - and token_type == tokens.COLON.lark_name() - ): - - return True - - # Space after commas in tuples and function arguments... - if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): - return False - return True - - if token_type in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - tokens.ELLIPSIS.lark_name(), - ]: - return True - - if ( - self._last_token_name - in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - ] - and token_type != "NL_OR_COMMENT" - ): - return True - - # Space around for_object arrow - if tokens.FOR_OBJECT_ARROW.lark_name() in [ - token_type, - self._last_token_name, - ]: - return True - - # Space after ellipsis in function arguments - if self._last_token_name == tokens.ELLIPSIS.lark_name(): - return True - - if tokens.EQ.lark_name() in [token_type, self._last_token_name]: - return True - - # Don't add space around operator tokens inside unary_op - if parent_rule_name == UnaryOpRule.lark_name(): - return False - - if ( - token_type in self._binary_op_types - or self._last_token_name in self._binary_op_types - ): - return True - - elif isinstance(current_node, Tree): - rule_name = current_node.data - - if parent_rule_name == BlockRule.lark_name(): - # Add space between multiple string/identifier labels in blocks - if rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ] and self._last_rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ]: - return True - - return False - - def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: - """Recursively reconstruct a Tree node into HCL text fragments.""" - result = [] - rule_name = tree.data - - if rule_name == UnaryOpRule.lark_name(): - for i, child in enumerate(tree.children): - result.extend(self._reconstruct_node(child, rule_name)) - if i == 0: - # Suppress space between unary operator and its operand - self._last_was_space = True - - elif rule_name == ExprTermRule.lark_name(): - # Check if parenthesized - if ( - len(tree.children) >= 3 - and isinstance(tree.children[0], Token) - and tree.children[0].type == tokens.LPAR.lark_name() - and isinstance(tree.children[-1], Token) - and tree.children[-1].type == tokens.RPAR.lark_name() - ): - self._in_parentheses = True - - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - self._in_parentheses = False - - else: - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - if self._should_add_space_before(tree, parent_rule_name): - result.insert(0, " ") - - # Update state tracking - self._last_rule_name = rule_name - if result: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: - """Reconstruct a Token node into HCL text fragments.""" - result = str(token.value) - if self._should_add_space_before(token, parent_rule_name): - result = " " + result - - self._last_token_name = token.type - if len(token) != 0: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_node( - self, node: Union[Tree, Token], parent_rule_name: str = None - ) -> List[str]: - """Reconstruct any node (Tree or Token) into HCL text fragments.""" - if isinstance(node, Tree): - return self._reconstruct_tree(node, parent_rule_name) - elif isinstance(node, Token): - return [self._reconstruct_token(node, parent_rule_name)] - else: - # Fallback: convert to string - return [str(node)] - - def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: - """Convert a Lark.Tree AST back into a string representation of HCL.""" - # Reset state - self._reset_state() - - # Reconstruct the tree - fragments = self._reconstruct_node(tree) - - # Join fragments and apply post-processing - result = "".join(fragments) - - if postproc: - result = postproc(result) - - # Ensure file ends with newline - if result and not result.endswith("\n"): - result += "\n" - - return result diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rules/__init__.py similarity index 100% rename from hcl2/rule_transformer/__init__.py rename to hcl2/rules/__init__.py diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rules/abstract.py similarity index 97% rename from hcl2/rule_transformer/rules/abstract.py rename to hcl2/rules/abstract.py index e83fed2b..a494d901 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -5,7 +5,7 @@ from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.utils import SerializationOptions, SerializationContext class LarkElement(ABC): diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rules/base.py similarity index 88% rename from hcl2/rule_transformer/rules/base.py rename to hcl2/rules/base.py index c879b772..a025949a 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rules/base.py @@ -4,14 +4,14 @@ from lark.tree import Meta from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE - -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE + +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rules/containers.py similarity index 93% rename from hcl2/rule_transformer/rules/containers.py rename to hcl2/rules/containers.py index a2f53436..4d7310c8 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rules/containers.py @@ -1,14 +1,14 @@ from typing import Tuple, List, Optional, Union, Any -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, ) -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import ( COLON, EQ, LBRACE, @@ -20,11 +20,11 @@ RPAR, DOT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rules/expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/expressions.py rename to hcl2/rules/expressions.py index db256e82..1e1d0cd8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -4,16 +4,16 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import ( +from hcl2.rules.abstract import ( LarkToken, ) -from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.literal_rules import BinaryOperatorRule +from hcl2.rules.tokens import LPAR, RPAR, QMARK, COLON +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( wrap_into_parentheses, to_dollar_string, SerializationOptions, diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rules/for_expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/for_expressions.py rename to hcl2/rules/for_expressions.py index 3a89aba3..a1f24dcb 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -2,10 +2,10 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule, LarkElement +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( LSQB, RSQB, LBRACE, @@ -18,11 +18,11 @@ ELLIPSIS, FOR_OBJECT_ARROW, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rules/functions.py similarity index 90% rename from hcl2/rule_transformer/rules/functions.py rename to hcl2/rules/functions.py index 92cc8b11..380b959b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rules/functions.py @@ -1,14 +1,14 @@ from functools import lru_cache from typing import Any, Optional, Tuple, Union, List -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rules/indexing.py similarity index 94% rename from hcl2/rule_transformer/rules/indexing.py rename to hcl2/rules/indexing.py index 20decf00..fc8cbf90 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -2,21 +2,21 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( DOT, IntLiteral, LSQB, RSQB, ATTR_SPLAT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, to_dollar_string, SerializationContext, diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rules/literal_rules.py similarity index 85% rename from hcl2/rule_transformer/rules/literal_rules.py rename to hcl2/rules/literal_rules.py index baf8546f..2e5b8281 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -1,8 +1,8 @@ from abc import ABC from typing import Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rules/strings.py similarity index 94% rename from hcl2/rule_transformer/rules/strings.py rename to hcl2/rules/strings.py index 4e28e976..248ab173 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rules/strings.py @@ -1,9 +1,9 @@ import sys from typing import Tuple, List, Any, Union -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, @@ -12,7 +12,7 @@ HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rules/tokens.py similarity index 98% rename from hcl2/rule_transformer/rules/tokens.py rename to hcl2/rules/tokens.py index ba948d3e..b02be66e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,7 +1,7 @@ from functools import lru_cache from typing import Callable, Any, Type, Optional, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken +from hcl2.rules.abstract import LarkToken class StringToken(LarkToken): diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rules/tree.py similarity index 100% rename from hcl2/rule_transformer/rules/tree.py rename to hcl2/rules/tree.py diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rules/whitespace.py similarity index 90% rename from hcl2/rule_transformer/rules/whitespace.py rename to hcl2/rules/whitespace.py index 62069b78..5f2fa886 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -1,9 +1,9 @@ from abc import ABC from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule -from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.literal_rules import TokenRule +from hcl2.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/transformer.py similarity index 93% rename from hcl2/rule_transformer/transformer.py rename to hcl2/transformer.py index 931eab8e..07230fe5 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/transformer.py @@ -2,13 +2,13 @@ from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( ObjectRule, ObjectElemRule, ObjectElemKeyRule, @@ -16,21 +16,21 @@ ObjectElemKeyExpressionRule, ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expressions import ( +from hcl2.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ForIntroRule, ForCondRule, ) -from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule -from hcl2.rule_transformer.rules.indexing import ( +from hcl2.rules.functions import ArgumentsRule, FunctionCallRule +from hcl2.rules.indexing import ( IndexExprTermRule, SqbIndexRule, ShortIndexRule, @@ -41,28 +41,28 @@ FullSplatRule, FullSplatExprTermRule, ) -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, BinaryOperatorRule, KeywordRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( InterpolationRule, StringRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, IntLiteral, FloatLiteral, StringToken, StaticStringToken, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.whitespace import NewLineOrCommentRule class RuleTransformer(Transformer): diff --git a/hcl2/rule_transformer/utils.py b/hcl2/utils.py similarity index 100% rename from hcl2/rule_transformer/utils.py rename to hcl2/utils.py From ba80334cd0ab6c567f425cd3813e5ed98132880c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:51:26 +0100 Subject: [PATCH 14/45] minor improvements to deserializer.py and formatter.py --- hcl2/deserializer.py | 43 +++++++++++++----------------- hcl2/formatter.py | 63 ++++++++++++++------------------------------ 2 files changed, 39 insertions(+), 67 deletions(-) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 2290809c..d6b4d4c2 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,8 +1,8 @@ import json from abc import ABC, abstractmethod from dataclasses import dataclass -from functools import lru_cache -from typing import Any, TextIO, List, Union, Optional +from functools import cached_property +from typing import Any, TextIO, List, Union from regex import regex @@ -55,7 +55,6 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.transformer import RuleTransformer from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @@ -83,11 +82,8 @@ def load(self, file: TextIO) -> LarkElement: class BaseDeserializer(LarkElementTreeDeserializer): def __init__(self, options=None): super().__init__(options) - self._current_line = 1 - self._last_new_line: Optional[NewLineOrCommentRule] = None - @property - @lru_cache + @cached_property def _transformer(self) -> RuleTransformer: return RuleTransformer() @@ -119,27 +115,29 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): - if self._is_block(value): + for key, val in value.items(): + if self._is_block(val): # this value is a list of blocks, iterate over each block and deserialize them - for block in value: + for block in val: children.append(self._deserialize_block(key, block)) else: # otherwise it's just an attribute if key != IS_BLOCK: - children.append(self._deserialize_attribute(key, value)) + children.append(self._deserialize_attribute(key, val)) return children def _deserialize_text(self, value: Any) -> LarkRule: - try: - int_val = int(value) - if "." in str(value): - return FloatLitRule([FloatLiteral(float(value))]) - return IntLitRule([IntLiteral(int_val)]) - except ValueError: - pass + # bool must be checked before int since bool is a subclass of int + if isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + if isinstance(value, float): + return FloatLitRule([FloatLiteral(value)]) + + if isinstance(value, int): + return IntLitRule([IntLiteral(value)]) if isinstance(value, str): if value.startswith('"') and value.endswith('"'): @@ -160,9 +158,6 @@ def _deserialize_text(self, value: Any) -> LarkRule: return self._deserialize_identifier(value) - elif isinstance(value, bool): - return self._deserialize_identifier(str(value).lower()) - return self._deserialize_identifier(str(value)) def _deserialize_identifier(self, value: str) -> IdentifierRule: @@ -283,8 +278,8 @@ def _deserialize_list(self, value: List) -> TupleRule: def _deserialize_object(self, value: dict) -> ObjectRule: children = [] - for key, value in value.items(): - children.append(self._deserialize_object_elem(key, value)) + for key, val in value.items(): + children.append(self._deserialize_object_elem(key, val)) if self.options.object_elements_trailing_comma: children.append(COMMA()) @@ -342,6 +337,6 @@ def _contains_block_marker(self, obj: dict) -> bool: return True if isinstance(value, list): for element in value: - if self._contains_block_marker(element): + if isinstance(element, dict) and self._contains_block_marker(element): return True return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 205d2ddd..35fb6b05 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -42,8 +42,7 @@ def format_tree(self, tree: LarkElement): class BaseFormatter(LarkElementTreeFormatter): def __init__(self, options: FormatterOptions = None): super().__init__(options) - self._current_line = 1 - self._current_indent_level = 0 + self._last_new_line: NewLineOrCommentRule = None def format_tree(self, tree: LarkElement): if isinstance(tree, StartRule): @@ -51,9 +50,6 @@ def format_tree(self, tree: LarkElement): def format_start_rule(self, rule: StartRule): self.format_body_rule(rule.body, 0) - # for child in rule.body.children: - # if isinstance(child, BlockRule): - # self.format_block_rule(child, 1) def format_block_rule(self, rule: BlockRule, indent_level: int = 0): if self.options.vertically_align_attributes: @@ -87,7 +83,8 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): new_children.insert(-2, self._build_newline(indent_level)) new_children.append(self._build_newline(indent_level, 2)) - new_children.pop(-1) + if new_children: + new_children.pop(-1) rule._children = new_children def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): @@ -158,7 +155,7 @@ def format_expression(self, rule: ExprTermRule, indent_level: int = 0): self.format_forobjectexpr(rule.expression, indent_level) elif isinstance(rule.expression, ExprTermRule): - self.format_expression(rule.expression) + self.format_expression(rule.expression, indent_level) def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): for child in expression.children: @@ -169,7 +166,6 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = for index in indexes: expression.children[index] = self._build_newline(indent_level) self._deindent_last_line() - # expression.children[8] = self._build_newline(indent_level - 1) def format_forobjectexpr( self, expression: ForObjectExprRule, indent_level: int = 0 @@ -192,23 +188,28 @@ def _vertically_align_attributes_in_body(self, body: BodyRule): attributes_sequence.append(child) elif attributes_sequence: - max_length = max( - len(attribute.identifier.token.value) - for attribute in attributes_sequence - ) - for attribute in attributes_sequence: - name_length = len(attribute.identifier.token.value) - spaces_to_add = max_length - name_length - attribute.children[1].set_value( - " " * spaces_to_add + attribute.children[1].value - ) + self._align_attributes_sequence(attributes_sequence) attributes_sequence = [] + if attributes_sequence: + self._align_attributes_sequence(attributes_sequence) + + def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(len(elem.key.serialize()) for elem in rule.elements) for elem in rule.elements: key_length = len(elem.key.serialize()) - print(elem.key.serialize(), key_length) spaces_to_add = max_length - key_length @@ -218,17 +219,6 @@ def _vertically_align_object_elems(self, rule: ObjectRule): elem.children[1].set_value(" " * spaces_to_add + separator.value) - def _move_to_next_line(self, times: int = 1): - self._current_line += times - - def _increase_indent_level(self, times: int = 1): - self._current_indent_level += times - - def _decrease_indent_level(self, times: int = 1): - self._current_indent_level -= times - if self._current_indent_level < 0: - self._current_indent_level = 0 - def _build_newline( self, next_line_indent: int = 0, count: int = 1 ) -> NewLineOrCommentRule: @@ -247,16 +237,3 @@ def _deindent_last_line(self, times: int = 1): for i in range(times): if token.value.endswith(" " * self.options.indent_length): token.set_value(token.value[: -self.options.indent_length]) - - # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: - # result = Meta() - # result.empty = length == 0 - # result.line = self._current_line - # result.column = indent_level * self.options.indent_length - # # result.start_pos = - # # result.end_line = - # # result.end_column = - # # result.end_pos = - # # result.orig_expansion = - # # result.match_tree = - # return result From e32d3e3028b3f808c9c3f865135bffe25aaa1b5c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 17:50:06 +0100 Subject: [PATCH 15/45] add round-trip test suite --- test/round_trip/__init__.py | 0 .../hcl2_original/operator_precedence.tf | 15 ++ test/round_trip/hcl2_original/smoke.tf | 72 ++++++ .../hcl2_reconstructed/operator_precedence.tf | 15 ++ test/round_trip/hcl2_reconstructed/smoke.tf | 64 +++++ .../operator_precedence.json | 20 ++ test/round_trip/json_reserialized/smoke.json | 70 ++++++ .../json_serialized/operator_precedence.json | 20 ++ test/round_trip/json_serialized/smoke.json | 70 ++++++ .../special/operator_precedence.json | 20 ++ test/round_trip/test_round_trip.py | 224 ++++++++++++++++++ test/unit/__init__.py | 1 - test/unit/test_builder.py | 110 --------- test/unit/test_dict_transformer.py | 32 --- test/unit/test_hcl2_syntax.py | 193 --------------- test/unit/test_load.py | 57 ----- test/unit/test_load_with_meta.py | 23 -- test/unit/test_reconstruct_ast.py | 112 --------- test/unit/test_reconstruct_dict.py | 88 ------- 19 files changed, 590 insertions(+), 616 deletions(-) create mode 100644 test/round_trip/__init__.py create mode 100644 test/round_trip/hcl2_original/operator_precedence.tf create mode 100644 test/round_trip/hcl2_original/smoke.tf create mode 100644 test/round_trip/hcl2_reconstructed/operator_precedence.tf create mode 100644 test/round_trip/hcl2_reconstructed/smoke.tf create mode 100644 test/round_trip/json_reserialized/operator_precedence.json create mode 100644 test/round_trip/json_reserialized/smoke.json create mode 100644 test/round_trip/json_serialized/operator_precedence.json create mode 100644 test/round_trip/json_serialized/smoke.json create mode 100644 test/round_trip/special/operator_precedence.json create mode 100644 test/round_trip/test_round_trip.py delete mode 100644 test/unit/__init__.py delete mode 100644 test/unit/test_builder.py delete mode 100644 test/unit/test_dict_transformer.py delete mode 100644 test/unit/test_hcl2_syntax.py delete mode 100644 test/unit/test_load.py delete mode 100644 test/unit/test_load_with_meta.py delete mode 100644 test/unit/test_reconstruct_ast.py delete mode 100644 test/unit/test_reconstruct_dict.py diff --git a/test/round_trip/__init__.py b/test/round_trip/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/round_trip/hcl2_original/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/round_trip/hcl2_original/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/round_trip/hcl2_original/smoke.tf new file mode 100644 index 00000000..d741a6ac --- /dev/null +++ b/test/round_trip/hcl2_original/smoke.tf @@ -0,0 +1,72 @@ + +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (- a) + e = ( + a == b + ? true : false + ) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10 + , + "k3" = {k4 = "a"} + (5 + 5) = "d" + k5.attr.attr = "e" + } + i = [ + a, b + , + "c${aaa}", + d, + [1, 2, 3,], + f(a), + provider::func::aa(5) + + ] + j = func( + a, b + , c, + d ... + + ) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[ + forwarding_rule_key + ].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + ] + } + ... + if + substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/round_trip/hcl2_reconstructed/operator_precedence.tf new file mode 100644 index 00000000..323759aa --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf new file mode 100644 index 00000000..b5c54e96 --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -0,0 +1,64 @@ +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (-a) + e = (a == b ? true : false) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10, + "k3" = { + k4 = "a", + }, + (5 + 5) = "d", + k5.attr.attr = "e", + } + i = [ + a, + b, + "c${aaa}", + d, + [ + 1, + 2, + 3, + ], + f(a), + provider::func::aa(), + ] + j = func(a, b, c, d) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + + ] + } ... if substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/round_trip/json_reserialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_reserialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_reserialized/smoke.json b/test/round_trip/json_reserialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_reserialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/round_trip/json_serialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_serialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/smoke.json b/test/round_trip/json_serialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_serialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/round_trip/special/operator_precedence.json new file mode 100644 index 00000000..35adb5bb --- /dev/null +++ b/test/round_trip/special/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${(var.env == \"prod\") && var.debug}", + "and_before_ternary": "${(true && true) ? 1 : 0}", + "mixed_arith_cmp": "${(var.a + (var.b * var.c)) > 10}", + "full_chain": "${(((a + b) == c) && d) || e}", + "left_assoc_sub": "${(a - b) - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${(!a) && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py new file mode 100644 index 00000000..b43340b6 --- /dev/null +++ b/test/round_trip/test_round_trip.py @@ -0,0 +1,224 @@ +"""Round-trip tests for the HCL2 → JSON → HCL2 pipeline. + +Every test starts from the source HCL files in test/round_trip/hcl2/ and +runs the pipeline forward from there, comparing actuals against expected +outputs at each stage: + +1. HCL → JSON serialization (parse + transform + serialize) +2. JSON → JSON reserialization (serialize + deserialize + reserialize) +3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) +4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) +""" + +import json +from enum import Enum +from pathlib import Path +from typing import List +from unittest import TestCase + +from hcl2 import parses +from hcl2.deserializer import BaseDeserializer +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + +ROUND_TRIP_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" + +_STEP_DIRS = { + "hcl2_original": HCL2_ORIGINAL_DIR, + "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", + "json_serialized": ROUND_TRIP_DIR / "json_serialized", + "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", +} + +_STEP_SUFFIXES = { + "hcl2_original": ".tf", + "hcl2_reconstructed": ".tf", + "json_serialized": ".json", + "json_reserialized": ".json", + "json_operator_precedence": ".json", +} + + +class SuiteStep(Enum): + ORIGINAL = "hcl2_original" + RECONSTRUCTED = "hcl2_reconstructed" + JSON_SERIALIZED = "json_serialized" + JSON_RESERIALIZED = "json_reserialized" + JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" + + +def _get_suites() -> List[str]: + """ + Get a list of the test suites. + Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + + Override SUITES to run a specific subset, e.g. SUITES = ["config"] + """ + return SUITES or sorted( + file.stem for file in HCL2_ORIGINAL_DIR.iterdir() if file.is_file() + ) + + +# set this to arbitrary list of test suites to run, +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +SUITES: List[str] = [] + + +def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: + """Return the path for a given suite name and pipeline step.""" + return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) + + +def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: + """Parse HCL text and serialize to a Python dict.""" + parsed_tree = parses(hcl_text) + rules = RuleTransformer().transform(parsed_tree) + if options: + return rules.serialize(options=options) + return rules.serialize() + + +def _deserialize_and_reserialize(serialized: dict) -> dict: + """Deserialize a Python dict back through the rule tree and reserialize.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + return deserialized.serialize() + + +def _deserialize_and_reconstruct(serialized: dict) -> str: + """Deserialize a Python dict and reconstruct HCL text.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestRoundTripSerialization(TestCase): + """Test HCL2 → JSON serialization: parse HCL, transform, serialize, compare with expected JSON.""" + + maxDiff = None + + def test_hcl_to_json(self): + for suite in _get_suites(): + yield self.check_hcl_to_json, suite + + def check_hcl_to_json(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) + + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) + + +class TestRoundTripReserialization(TestCase): + """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + + maxDiff = None + + def test_json_reserialization(self): + for suite in _get_suites(): + yield self.check_json_reserialization, suite + + def check_json_reserialization(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) + + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) + + +class TestRoundTripReconstruction(TestCase): + """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + + maxDiff = None + + def test_json_to_hcl(self): + for suite in _get_suites(): + yield self.check_json_to_hcl, suite + + def check_json_to_hcl(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) + + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) + + +class TestRoundTripFull(TestCase): + """Test full round-trip: HCL → JSON → HCL → JSON should produce matching JSON.""" + + maxDiff = None + + def test_full_round_trip(self): + for suite in _get_suites(): + yield self.check_full_round_trip, suite + + def check_full_round_trip(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() + + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) + + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) + + # Re-parse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) + + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) diff --git a/test/unit/__init__.py b/test/unit/__init__.py deleted file mode 100644 index c497b297..00000000 --- a/test/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests -- tests that verify the code of this egg in isolation""" diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py deleted file mode 100644 index 2ce0cfed..00000000 --- a/test/unit/test_builder.py +++ /dev/null @@ -1,110 +0,0 @@ -# pylint:disable=C0116 - -"""Test building an HCL file from scratch""" - -from pathlib import Path -from unittest import TestCase - -import hcl2 -import hcl2.builder - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestBuilder(TestCase): - """Test building a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_build_blocks_tf(self): - nested_builder = hcl2.Builder() - nested_builder.block("nested_block_1", ["a"], foo="bar") - nested_builder.block("nested_block_1", ["a", "b"], bar="foo") - nested_builder.block("nested_block_1", foobar="barfoo") - nested_builder.block("nested_block_2", barfoo="foobar") - - builder = hcl2.Builder() - builder.block("block", a=1) - builder.block("block", ["label"], __nested_builder__=nested_builder, b=2) - - self.compare_filenames(builder, "blocks.tf") - - def test_build_escapes_tf(self): - builder = hcl2.Builder() - - builder.block("block", ["block_with_newlines"], a="line1\nline2") - - self.compare_filenames(builder, "escapes.tf") - - def test_locals_embdedded_condition_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - terraform={ - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": None, - }, - ) - - self.compare_filenames(builder, "locals_embedded_condition.tf") - - def test_locals_embedded_function_tf(self): - builder = hcl2.Builder() - - function_test = ( - "${var.basename}-${var.forwarder_function_name}_" - '${md5("${var.vpc_id}${data.aws_region.current.name}")}' - ) - builder.block("locals", function_test=function_test) - - self.compare_filenames(builder, "locals_embedded_function.tf") - - def test_locals_embedded_interpolation_tf(self): - builder = hcl2.Builder() - - attributes = { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); " - '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', - "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}', - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": '${"bar"}$${baz:bat}', - "simple_and_escaped_reversed": '$${baz:bat}${"bar"}', - "nested_escaped": 'bar-${"$${baz:bat}"}', - } - - builder.block("locals", **attributes) - - self.compare_filenames(builder, "string_interpolations.tf") - - def test_provider_function_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - name2='${provider::test2::test("a")}', - name3='${test("a")}', - ) - - self.compare_filenames(builder, "provider_function.tf") - - def compare_filenames(self, builder: hcl2.Builder, filename: str): - hcl_dict = builder.build() - hcl_ast = hcl2.reverse_transform(hcl_dict) - hcl_content_built = hcl2.writes(hcl_ast) - - hcl_path = (HCL2_DIR / filename).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - self.assertMultiLineEqual( - hcl_content_built, - hcl_file_content, - f"file {filename} does not match its programmatically built version.", - ) diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py deleted file mode 100644 index baad5ba9..00000000 --- a/test/unit/test_dict_transformer.py +++ /dev/null @@ -1,32 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -from unittest import TestCase - -from hcl2.dict_transformer import DictTransformer - - -class TestDictTransformer(TestCase): - """Test behaviour of hcl2.transformer.DictTransformer class""" - - @staticmethod - def build_dict_transformer(with_meta: bool = False) -> DictTransformer: - return DictTransformer(with_meta) - - def test_to_string_dollar(self): - string_values = { - '"bool"': "bool", - '"number"': "number", - '"string"': "string", - "${value_1}": "${value_1}", - '"value_2': '${"value_2}', - 'value_3"': '${value_3"}', - '"value_4"': "value_4", - "value_5": "${value_5}", - } - - dict_transformer = self.build_dict_transformer() - - for value, expected in string_values.items(): - actual = dict_transformer.to_string_dollar(value) - - self.assertEqual(actual, expected) diff --git a/test/unit/test_hcl2_syntax.py b/test/unit/test_hcl2_syntax.py deleted file mode 100644 index 96113df3..00000000 --- a/test/unit/test_hcl2_syntax.py +++ /dev/null @@ -1,193 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -import string # pylint:disable=W4901 # https://stackoverflow.com/a/16651393 -from unittest import TestCase - -from test.helpers.hcl2_helper import Hcl2Helper - -from lark import UnexpectedToken, UnexpectedCharacters - - -class TestHcl2Syntax(Hcl2Helper, TestCase): - """Test parsing individual elements of HCL2 syntax""" - - def test_argument(self): - syntax = self.build_argument("identifier", '"expression"') - result = self.load_to_dict(syntax) - self.assertDictEqual(result, {"identifier": "expression"}) - - def test_identifier_starts_with_digit(self): - for i in range(0, 10): - argument = self.build_argument(f"{i}id") - with self.assertRaises(UnexpectedToken) as e: - self.load_to_dict(argument) - assert ( - f"Unexpected token Token('DECIMAL', '{i}') at line 1, column 1" - in str(e) - ) - - def test_identifier_starts_with_special_chars(self): - chars = string.punctuation.replace("_", "") - for i in chars: - argument = self.build_argument(f"{i}id") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier_contains_special_chars(self): - chars = string.punctuation.replace("_", "").replace("-", "") - for i in chars: - argument = self.build_argument(f"identifier{i}") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier(self): - argument = self.build_argument("_-__identifier_-1234567890-_") - self.load_to_dict(argument) - - def test_block_no_labels(self): - block = """ - block { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{}]}) - - def test_block_single_label(self): - block = """ - block "label" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{"label": {}}]}) - - def test_block_multiple_labels(self): - block = """ - block "label1" "label2" "label3" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual( - result, {"block": [{"label1": {"label2": {"label3": {}}}}]} - ) - - def test_unary_operation(self): - operations = [ - ("identifier = -10", {"identifier": -10}), - ("identifier = !true", {"identifier": "${!true}"}), - ] - for hcl, dict_ in operations: - result = self.load_to_dict(hcl) - self.assertDictEqual(result, dict_) - - def test_tuple(self): - tuple_ = """tuple = [ - identifier, - "string", 100, - true == false, - 5 + 5, function(), - ]""" - result = self.load_to_dict(tuple_) - self.assertDictEqual( - result, - { - "tuple": [ - "${identifier}", - "string", - 100, - "${true == false}", - "${5 + 5}", - "${function()}", - ] - }, - ) - - def test_object(self): - object_ = """object = { - key1: identifier, key2: "string", key3: 100, - key4: true == false // comment - key5: 5 + 5, key6: function(), - key7: value == null ? 1 : 0 - }""" - result = self.load_to_dict(object_) - self.assertDictEqual( - result, - { - "object": { - "key1": "${identifier}", - "key2": "string", - "key3": 100, - "key4": "${true == false}", - "key5": "${5 + 5}", - "key6": "${function()}", - "key7": "${value == null ? 1 : 0}", - } - }, - ) - - def test_function_call_and_arguments(self): - calls = { - "r = function()": {"r": "${function()}"}, - "r = function(arg1, arg2)": {"r": "${function(arg1, arg2)}"}, - """r = function( - arg1, arg2, - arg3, - ) - """: { - "r": "${function(arg1, arg2, arg3)}" - }, - } - - for call, expected in calls.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_index(self): - indexes = { - "r = identifier[10]": {"r": "${identifier[10]}"}, - "r = identifier.20": { - "r": "${identifier[2]}" - }, # TODO debug why `20` is parsed to `2` - """r = identifier["key"]""": {"r": '${identifier["key"]}'}, - """r = identifier.key""": {"r": "${identifier.key}"}, - } - for call, expected in indexes.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_e_notation(self): - literals = { - "var = 3e4": {"var": "${3e4}"}, - "var = 3.5e5": {"var": "${3.5e5}"}, - "var = -3e6": {"var": "${-3e6}"}, - "var = -2.3e4": {"var": "${-2.3e4}"}, - "var = -5e-2": {"var": "${-5e-2}"}, - "var = -6.1e-3": {"var": "${-6.1e-3}"}, - } - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) - - def test_null(self): - identifier = "var = null" - - expected = {"var": None} - - result = self.load_to_dict(identifier) - self.assertDictEqual(result, expected) - - def test_expr_term_parenthesis(self): - literals = { - "a = 1 * 2 + 3": {"a": "${1 * 2 + 3}"}, - "b = 1 * (2 + 3)": {"b": "${1 * (2 + 3)}"}, - "c = (1 * (2 + 3))": {"c": "${(1 * (2 + 3))}"}, - "conditional = value == null ? 1 : 0": { - "conditional": "${value == null ? 1 : 0}" - }, - "conditional = (value == null ? 1 : 0)": { - "conditional": "${(value == null ? 1 : 0)}" - }, - } - - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) diff --git a/test/unit/test_load.py b/test/unit/test_load.py deleted file mode 100644 index f9be8845..00000000 --- a/test/unit/test_load.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Test parsing a variety of hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -from hcl2.parser import PARSER_FILE, parser -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestLoad(TestCase): - """Test parsing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_load_terraform(self): - """Test parsing a set of hcl2 files and force recreating the parser file""" - - # create a parser to make sure that the parser file is created - parser() - - # delete the parser file to force it to be recreated - PARSER_FILE.unlink() - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_load_terraform_from_cache(self): - """Test parsing a set of hcl2 files from a cached parser file""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """Loads a single hcl2 file, parses it and compares with the expected json""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - if not json_path.exists(): - assert ( - False - ), f"Expected json equivalent of the hcl file doesn't exist {json_path}" - - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict = hcl2.load(hcl_file) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, json_dict, f"\n\nfailed comparing {hcl_path_str}" - ) diff --git a/test/unit/test_load_with_meta.py b/test/unit/test_load_with_meta.py deleted file mode 100644 index b081844e..00000000 --- a/test/unit/test_load_with_meta.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test parsing hcl files with meta parameters""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - -TEST_WITH_META_DIR = Path(__file__).absolute().parent.parent / "helpers" / "with-meta" -TF_FILE_PATH = TEST_WITH_META_DIR / "data_sources.tf" -JSON_FILE_PATH = TEST_WITH_META_DIR / "data_sources.json" - - -class TestLoadWithMeta(TestCase): - """Test parsing hcl files with meta parameters""" - - def test_load_terraform_meta(self): - """Test load() with with_meta flag set to true.""" - with TF_FILE_PATH.open("r") as tf_file, JSON_FILE_PATH.open("r") as json_file: - self.assertDictEqual( - json.load(json_file), - hcl2.load(tf_file, with_meta=True), - ) diff --git a/test/unit/test_reconstruct_ast.py b/test/unit/test_reconstruct_ast.py deleted file mode 100644 index b9545def..00000000 --- a/test/unit/test_reconstruct_ast.py +++ /dev/null @@ -1,112 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_write_terraform_exact(self): - """ - Test reconstructing a set of hcl2 files, to make sure they - reconstruct exactly the same, including whitespace. - """ - - # the reconstruction process is not precise, so some files do not - # reconstruct their whitespace exactly the same, but they are - # syntactically equivalent. This list is a target for further - # improvements to the whitespace handling of the reconstruction - # algorithm. - inexact_files = [ - # the reconstructor loses commas on the last element in an array, - # even if they're in the input file - "iam.tf", - "variables.tf", - # the reconstructor doesn't preserve indentation within comments - # perfectly - "multiline_expressions.tf", - # the reconstructor doesn't preserve the line that a ternary is - # broken on. - "route_table.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_whitespace, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - try: - hcl2_dict = hcl2.loads(hcl_reconstructed) - except Exception as exc: - assert ( - False - ), f"failed to tokenize terraform in file reconstructed from `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, - json_dict, - f"failed comparing {hcl_path_str} with reconstructed version", - ) - - def check_whitespace(self, hcl_path_str: str): - """Tests that the reconstructed file matches the original file exactly.""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - self.assertMultiLineEqual( - hcl_reconstructed, - hcl_file_content, - f"file {hcl_path_str} does not match its reconstructed version \ - exactly. this is usually whitespace related.", - ) diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py deleted file mode 100644 index a65e8429..00000000 --- a/test/unit/test_reconstruct_dict.py +++ /dev/null @@ -1,88 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -import traceback -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - - # the reconstruction process is not precise, so some files do not - # reconstruct any embedded HCL expressions exactly the same. this - # list captures those, and should be manually inspected regularly to - # ensure that files remain syntactically equivalent - inexact_files = [ - # one level of interpolation is stripped from this file during - # reconstruction, since we don't have a way to distinguish it from - # a complex HCL expression. the output parses to the same value - # though - "multi_level_interpolation.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict_correct = hcl2.load(hcl_file) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'correct' terraform in " - f"`{hcl_path_str}`: {traceback.format_exc()}" - ) from exc - - json_dict = json.load(json_file) - - try: - hcl_ast = hcl2.reverse_transform(json_dict) - except Exception as exc: - raise RuntimeError( - f"failed to reverse transform HCL from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - raise RuntimeError( - f"failed to reconstruct terraform from AST from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'reconstructed' terraform from AST from " - f"`{json_path.name}`: {exc}, \n{hcl_reconstructed}" - ) from exc - - self.assertDictEqual( - hcl2_dict_reconstructed, - hcl2_dict_correct, - f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}", - ) From e32a5407f3cf4e0e052dfd10456a031ba7b4816c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:14:38 +0100 Subject: [PATCH 16/45] removed old unused file --- hcl2/rules/tree.py | 106 --------------------------------------------- 1 file changed, 106 deletions(-) delete mode 100644 hcl2/rules/tree.py diff --git a/hcl2/rules/tree.py b/hcl2/rules/tree.py deleted file mode 100644 index e39d2077..00000000 --- a/hcl2/rules/tree.py +++ /dev/null @@ -1,106 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Optional, Any, Union - - -class LarkNode(ABC): - """Base class for all nodes in the tree""" - - def __init__(self, index: int = -1, parent: Optional["Node"] = None): - self._index = index - self._parent = parent - - @property - def parent(self) -> Optional["Node"]: - return self._parent - - @property - def index(self) -> int: - return self._index - - def set_parent(self, parent: "Node"): - self._parent = parent - - def set_index(self, index: int): - self._index = index - - @abstractmethod - def serialize(self, options=None) -> Any: - pass - - @abstractmethod - def to_lark(self) -> Any: - """Convert back to Lark representation""" - pass - - def is_leaf(self) -> bool: - """Check if this is a leaf node (atomic token)""" - return isinstance(self, LeafNode) - - def is_sequence(self) -> bool: - """Check if this is a token sequence node""" - return isinstance(self, SequenceNode) - - def is_internal(self) -> bool: - """Check if this is an internal node (grammar rule)""" - return isinstance(self, InternalNode) - - def is_atomic(self) -> bool: - """Check if this represents an atomic value (leaf or sequence)""" - return self.is_leaf() or self.is_sequence() - - -class LarkLeaf(Node, ABC): - """""" - - def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): - super().__init__(index, parent) - self._value = value - - @property - def value(self) -> Any: - return self._value - - def serialize(self, options=None) -> Any: - return self._value - - -class InternalNode(Node): - def __init__( - self, children: List[Node], index: int = -1, parent: Optional[Node] = None - ): - super().__init__(index, parent) - self._children = children or [] - - # Set parent and index for all children - for i, child in enumerate(self._children): - if child is not None: - child.set_parent(self) - child.set_index(i) - - @property - def children(self) -> List[Node]: - return self._children - - def add_child(self, child: Node): - """Add a child to this internal node""" - child.set_parent(self) - child.set_index(len(self._children)) - self._children.append(child) - - def remove_child(self, index: int) -> Optional[Node]: - """Remove child at given index""" - if 0 <= index < len(self._children): - child = self._children.pop(index) - if child: - child.set_parent(None) - # Update indices for remaining children - for i in range(index, len(self._children)): - if self._children[i]: - self._children[i].set_index(i) - return child - return None - - @abstractmethod - def rule_name(self) -> str: - """The name of the grammar rule this represents""" - pass From 210e3cd2c354670b7ee3e0fde217d1862d4d26ba Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:15:24 +0100 Subject: [PATCH 17/45] fix - dont add spaces add the end of the line (before newline rule); remove unused import --- hcl2/reconstructor.py | 4 ++-- hcl2/rules/abstract.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index e92f7040..1b5260ac 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -87,8 +87,8 @@ def _should_add_space_before( # Space after commas in tuples and function arguments... if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): + # ... except before closing brackets or newlines + if token_type in (tokens.RSQB.lark_name(), "NL_OR_COMMENT"): return False return True diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index a494d901..316c777a 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional, Tuple, Callable +from typing import Any, Union, List, Optional, Callable from lark import Token, Tree -from lark.exceptions import VisitError from lark.tree import Meta from hcl2.utils import SerializationOptions, SerializationContext From b235ec9845b3caea2b6218f8f29608567c42a240 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:27:24 +0100 Subject: [PATCH 18/45] use unittest subTest to fix noise in test results ("The type of the None singleton"); fix whitespaces in `test/round_trip/hcl2_reconstructed/smoke.tf` --- test/round_trip/hcl2_reconstructed/smoke.tf | 8 +- test/round_trip/test_round_trip.py | 107 +++++++++----------- 2 files changed, 53 insertions(+), 62 deletions(-) diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf index b5c54e96..8f17d6d6 100644 --- a/test/round_trip/hcl2_reconstructed/smoke.tf +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -32,7 +32,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } @@ -52,12 +52,12 @@ block label1 label3 { block { route53_forwarding_rule_shares = { - for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - + ] } ... if substr(bucket_name, 0, 1) == "l" } diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py index b43340b6..93fcd111 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/round_trip/test_round_trip.py @@ -26,12 +26,13 @@ ROUND_TRIP_DIR = Path(__file__).absolute().parent HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" +SPECIAL_DIR = ROUND_TRIP_DIR / "special" + _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", "json_serialized": ROUND_TRIP_DIR / "json_serialized", "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", - "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", } _STEP_SUFFIXES = { @@ -39,7 +40,6 @@ "hcl2_reconstructed": ".tf", "json_serialized": ".json", "json_reserialized": ".json", - "json_operator_precedence": ".json", } @@ -48,7 +48,6 @@ class SuiteStep(Enum): RECONSTRUCTED = "hcl2_reconstructed" JSON_SERIALIZED = "json_serialized" JSON_RESERIALIZED = "json_reserialized" - JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" def _get_suites() -> List[str]: @@ -109,20 +108,18 @@ class TestRoundTripSerialization(TestCase): def test_hcl_to_json(self): for suite in _get_suites(): - yield self.check_hcl_to_json, suite - - def check_hcl_to_json(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) - actual = _parse_and_serialize(hcl_path.read_text()) - expected = json.loads(json_path.read_text()) + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) - self.assertEqual( - actual, - expected, - f"HCL → JSON serialization mismatch for {suite}", - ) + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) class TestRoundTripReserialization(TestCase): @@ -132,21 +129,19 @@ class TestRoundTripReserialization(TestCase): def test_json_reserialization(self): for suite in _get_suites(): - yield self.check_json_reserialization, suite + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) - def check_json_reserialization(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reserialize(serialized) - - expected = json.loads(json_reserialized_path.read_text()) - self.assertEqual( - actual, - expected, - f"JSON reserialization mismatch for {suite}", - ) + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) class TestRoundTripReconstruction(TestCase): @@ -156,21 +151,19 @@ class TestRoundTripReconstruction(TestCase): def test_json_to_hcl(self): for suite in _get_suites(): - yield self.check_json_to_hcl, suite - - def check_json_to_hcl(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reconstruct(serialized) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) - expected = hcl_reconstructed_path.read_text() - self.assertMultiLineEqual( - actual, - expected, - f"HCL reconstruction mismatch for {suite}", - ) + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) class TestRoundTripFull(TestCase): @@ -180,27 +173,25 @@ class TestRoundTripFull(TestCase): def test_full_round_trip(self): for suite in _get_suites(): - yield self.check_full_round_trip, suite - - def check_full_round_trip(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - original_hcl = hcl_path.read_text() + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() - # Forward: HCL → JSON - serialized = _parse_and_serialize(original_hcl) + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) - # Reconstruct: JSON → HCL - reconstructed_hcl = _deserialize_and_reconstruct(serialized) + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) - # Re-parse: reconstructed HCL → JSON - reserialized = _parse_and_serialize(reconstructed_hcl) + # Reparse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) - self.assertEqual( - reserialized, - serialized, - f"Full round-trip mismatch for {suite}: " - f"HCL → JSON → HCL → JSON did not produce identical JSON", - ) + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) class TestOperatorPrecedence(TestCase): From a3fe3267dc0361d3cf78ab5d8bc201c0e53d90ab Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:39:02 +0100 Subject: [PATCH 19/45] remove files for WIP features --- hcl2/editor.py | 77 -------------- hcl2/processor.py | 258 ---------------------------------------------- 2 files changed, 335 deletions(-) delete mode 100644 hcl2/editor.py delete mode 100644 hcl2/processor.py diff --git a/hcl2/editor.py b/hcl2/editor.py deleted file mode 100644 index 9efce08f..00000000 --- a/hcl2/editor.py +++ /dev/null @@ -1,77 +0,0 @@ -import dataclasses -from copy import copy, deepcopy -from typing import List, Optional, Set, Tuple - -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.base import BlockRule, StartRule - - -@dataclasses.dataclass -class TreePathElement: - - name: str - index: int = 0 - - -@dataclasses.dataclass -class TreePath: - - elements: List[TreePathElement] = dataclasses.field(default_factory=list) - - @classmethod - def build(cls, elements: List[Tuple[str, Optional[int]] | str]): - results = [] - for element in elements: - if isinstance(element, tuple): - if len(element) == 1: - result = TreePathElement(element[0], 0) - else: - result = TreePathElement(*element) - else: - result = TreePathElement(element, 0) - - results.append(result) - - return cls(results) - - def __iter__(self): - return self.elements.__iter__() - - def __len__(self): - return self.elements.__len__() - - -class Editor: - def __init__(self, rules_tree: LarkRule): - self.rules_tree = rules_tree - - @classmethod - def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: - return cls._find_all(rules_tree, path_element.name)[path_element.index] - - @classmethod - def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: - children = [] - print("rule", rules_tree) - print("rule children", rules_tree.children) - for child in rules_tree.children: - if isinstance(child, LarkRule) and child.lark_name() == rule_name: - children.append(child) - - return children - - def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: - path = deepcopy(path.elements) - - current_rule = self.rules_tree - while len(path) > 0: - current_path, *path = path - print(current_path, path) - current_rule = self._find_one(current_rule, current_path) - - return self._find_all(current_rule, rule_name) - - # def visit(self, path: TreePath) -> "Editor": - # - # while len(path) > 1: - # current = diff --git a/hcl2/processor.py b/hcl2/processor.py deleted file mode 100644 index b854aff5..00000000 --- a/hcl2/processor.py +++ /dev/null @@ -1,258 +0,0 @@ -from copy import copy, deepcopy -from typing import ( - List, - Optional, - Union, - Callable, - Any, - Tuple, - Generic, - TypeVar, - cast, - Generator, -) - -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule - -T = TypeVar("T", bound=LarkRule) - - -class RulesProcessor(Generic[T]): - """""" - - @classmethod - def _traverse( - cls, - node: T, - predicate: Callable[[T], bool], - current_depth: int = 0, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor"]: - - results = [] - - if predicate(node): - results.append(cls(node)) - - if max_depth is not None and current_depth >= max_depth: - return results - - for child in node.children: - if child is None or not isinstance(child, LarkRule): - continue - - child_results = cls._traverse( - child, - predicate, - current_depth + 1, - max_depth, - ) - results.extend(child_results) - - return results - - def __init__(self, node: LarkRule): - self.node = node - - @property - def siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children - - @property - def next_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[self.node.index + 1 :] - - @property - def previous_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[: self.node.index - 1] - - def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: - child_processors = [self.__class__(child) for child in self.node.children] - yield self, child_processors - for processor in child_processors: - if isinstance(processor.node, LarkRule): - for result in processor.walk(): - yield result - - def find_block( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> "RulesProcessor[BlockRule]": - return self.find_blocks(labels, exact_match, max_depth)[0] - - def find_blocks( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor[BlockRule]"]: - """ - Find blocks by their labels. - - Args: - labels: List of label strings to match - exact_match: If True, all labels must match exactly. If False, labels can be a subset. - max_depth: Maximum depth to search - - Returns: - ... - """ - - def block_predicate(node: LarkRule) -> bool: - if not isinstance(node, BlockRule): - return False - - node_labels = [label.serialize() for label in node.labels] - - if exact_match: - return node_labels == labels - else: - # Check if labels is a prefix of node_labels - if len(labels) > len(node_labels): - return False - return node_labels[: len(labels)] == labels - - return cast( - List[RulesProcessor[BlockRule]], - self._traverse(self.node, block_predicate, max_depth=max_depth), - ) - - def attribute( - self, name: str, max_depth: Optional[int] = None - ) -> "RulesProcessor[AttributeRule]": - return self.find_attributes(name, max_depth)[0] - - def find_attributes( - self, name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor[AttributeRule]"]: - """ - Find attributes by their identifier name. - - Args: - name: Attribute name to search for - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching attributes - """ - - def attribute_predicate(node: LarkRule) -> bool: - if not isinstance(node, AttributeRule): - return False - return node.identifier.serialize() == name - - return self._traverse(self.node, attribute_predicate, max_depth=max_depth) - - def rule(self, rule_name: str, max_depth: Optional[int] = None): - return self.find_rules(rule_name, max_depth)[0] - - def find_rules( - self, rule_name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules of a specific type. - - Args: - rule_name: Name of the rule type to find - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - - def rule_predicate(node: LarkRule) -> bool: - return node.lark_name() == rule_name - - return self._traverse(self.node, rule_predicate, max_depth=max_depth) - - def find_by_predicate( - self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules matching a custom predicate. - - Args: - predicate: Function that returns True for nodes to collect - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - return self._traverse(self.node, predicate, max_depth) - - # Convenience methods - def get_all_blocks(self, max_depth: Optional[int] = None) -> List: - """Get all blocks in the tree.""" - return self.find_rules("block", max_depth) - - def get_all_attributes( - self, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """Get all attributes in the tree.""" - return self.find_rules("attribute", max_depth) - - def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in reversed(self.previous_siblings): - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in self.next_siblings: - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def append_child( - self, new_node: LarkRule, indentation: bool = True - ) -> "RulesProcessor": - children = self.node.children - if indentation: - if isinstance(children[-1], NewLineOrCommentRule): - children.pop() - children.append(NewLineOrCommentRule.from_string("\n ")) - - new_node = deepcopy(new_node) - new_node.set_parent(self.node) - new_node.set_index(len(children)) - children.append(new_node) - return self.__class__(new_node) - - def replace(self, new_node: LarkRule) -> "RulesProcessor": - new_node = deepcopy(new_node) - - self.node.parent.children.pop(self.node.index) - self.node.parent.children.insert(self.node.index, new_node) - new_node.set_parent(self.node.parent) - new_node.set_index(self.node.index) - return self.__class__(new_node) - - # def insert_before(self, new_node: LarkRule) -> bool: - # """Insert a new node before this one.""" - # if self.parent is None or self.parent_index < 0: - # return False - # - # try: - # self.parent.children.insert(self.parent_index, new_node) - # except (IndexError, AttributeError): - # return False From 4054fc9627d70028f56124fc22c2c112ef4752f9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 19:30:21 +0100 Subject: [PATCH 20/45] add new unit tests, exclude some files from coverage report --- .coveragerc | 3 + test/helpers/__init__.py | 3 - test/helpers/hcl2_helper.py | 21 -- test/unit/__init__.py | 0 test/unit/rules/__init__.py | 0 test/unit/rules/test_abstract.py | 178 ++++++++++ test/unit/rules/test_containers.py | 396 +++++++++++++++++++++ test/unit/rules/test_expressions.py | 489 ++++++++++++++++++++++++++ test/unit/rules/test_literal_rules.py | 95 +++++ test/unit/rules/test_strings.py | 247 +++++++++++++ test/unit/rules/test_tokens.py | 162 +++++++++ test/unit/rules/test_whitespace.py | 135 +++++++ test/unit/test_utils.py | 148 ++++++++ 13 files changed, 1853 insertions(+), 24 deletions(-) delete mode 100644 test/helpers/__init__.py delete mode 100644 test/helpers/hcl2_helper.py create mode 100644 test/unit/__init__.py create mode 100644 test/unit/rules/__init__.py create mode 100644 test/unit/rules/test_abstract.py create mode 100644 test/unit/rules/test_containers.py create mode 100644 test/unit/rules/test_expressions.py create mode 100644 test/unit/rules/test_literal_rules.py create mode 100644 test/unit/rules/test_strings.py create mode 100644 test/unit/rules/test_tokens.py create mode 100644 test/unit/rules/test_whitespace.py create mode 100644 test/unit/test_utils.py diff --git a/.coveragerc b/.coveragerc index 4facabdc..30e6dc8c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,6 +3,9 @@ branch = true omit = hcl2/__main__.py hcl2/lark_parser.py + hcl2/version.py + hcl2/__init__.py + hcl2/rules/__init__.py [report] show_missing = true diff --git a/test/helpers/__init__.py b/test/helpers/__init__.py deleted file mode 100644 index ba33e308..00000000 --- a/test/helpers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Helper functions for tests -""" diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py deleted file mode 100644 index c39ee7fb..00000000 --- a/test/helpers/hcl2_helper.py +++ /dev/null @@ -1,21 +0,0 @@ -# pylint:disable=C0114,C0115,C0116 - -from lark import Tree - -from hcl2.parser import parser -from hcl2.dict_transformer import DictTransformer - - -class Hcl2Helper: - @classmethod - def load(cls, syntax: str) -> Tree: - return parser().parse(syntax) - - @classmethod - def load_to_dict(cls, syntax) -> dict: - tree = cls.load(syntax) - return DictTransformer().transform(tree) - - @classmethod - def build_argument(cls, identifier: str, expression: str = '"expression"') -> str: - return f"{identifier} = {expression}" diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/__init__.py b/test/unit/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py new file mode 100644 index 00000000..8803effc --- /dev/null +++ b/test/unit/rules/test_abstract.py @@ -0,0 +1,178 @@ +from unittest import TestCase + +from lark import Token, Tree +from lark.tree import Meta + +from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Concrete stubs for testing ABCs --- + + +class ConcreteToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "TEST_TOKEN" + + @property + def serialize_conversion(self): + return str + + +class IntToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "INT_TOKEN" + + @property + def serialize_conversion(self): + return int + + +class ConcreteRule(LarkRule): + @staticmethod + def lark_name() -> str: + return "test_rule" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +# --- Tests --- + + +class TestLarkToken(TestCase): + def test_init_stores_value(self): + token = ConcreteToken("hello") + self.assertEqual(token.value, "hello") + + def test_value_property(self): + token = ConcreteToken(42) + self.assertEqual(token.value, 42) + + def test_set_value(self): + token = ConcreteToken("old") + token.set_value("new") + self.assertEqual(token.value, "new") + + def test_str(self): + token = ConcreteToken("hello") + self.assertEqual(str(token), "hello") + + def test_str_numeric(self): + token = ConcreteToken(42) + self.assertEqual(str(token), "42") + + def test_repr(self): + token = ConcreteToken("hello") + self.assertEqual(repr(token), "") + + def test_to_lark_returns_token(self): + token = ConcreteToken("val") + lark_token = token.to_lark() + self.assertIsInstance(lark_token, Token) + self.assertEqual(lark_token.type, "TEST_TOKEN") + self.assertEqual(lark_token, "val") + + def test_serialize_uses_conversion(self): + token = ConcreteToken("hello") + self.assertEqual(token.serialize(), "hello") + + def test_serialize_int_conversion(self): + token = IntToken("42") + result = token.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + def test_lark_name(self): + self.assertEqual(ConcreteToken.lark_name(), "TEST_TOKEN") + + +class TestLarkRule(TestCase): + def test_init_sets_children(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertEqual(rule.children, [t1, t2]) + + def test_init_sets_parent_and_index(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertIs(t1._parent, rule) + self.assertIs(t2._parent, rule) + self.assertEqual(t1._index, 0) + self.assertEqual(t2._index, 1) + + def test_init_skips_none_children_for_parent_index(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + self.assertIs(t1._parent, rule) + self.assertEqual(t1._index, 1) + + def test_init_with_meta(self): + meta = Meta() + rule = ConcreteRule([], meta) + self.assertIs(rule._meta, meta) + + def test_init_without_meta(self): + rule = ConcreteRule([]) + self.assertIsNotNone(rule._meta) + + def test_parent_property(self): + child_rule = ConcreteRule([]) + parent_rule = ConcreteRule([child_rule]) + self.assertIs(child_rule.parent, parent_rule) + + def test_index_property(self): + child_rule = ConcreteRule([]) + ConcreteRule([child_rule]) + self.assertEqual(child_rule.index, 0) + + def test_children_property(self): + t = ConcreteToken("x") + rule = ConcreteRule([t]) + self.assertEqual(rule.children, [t]) + + def test_to_lark_builds_tree(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + tree = rule.to_lark() + self.assertIsInstance(tree, Tree) + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 2) + + def test_to_lark_skips_none_children(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + tree = rule.to_lark() + self.assertEqual(len(tree.children), 1) + self.assertEqual(tree.children[0], "a") + + def test_repr(self): + rule = ConcreteRule([]) + self.assertEqual(repr(rule), "") + + def test_nested_rules(self): + inner = ConcreteRule([ConcreteToken("x")]) + outer = ConcreteRule([inner]) + self.assertIs(inner.parent, outer) + tree = outer.to_lark() + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 1) + self.assertIsInstance(tree.children[0], Tree) + + +class TestLarkElement(TestCase): + def test_set_index(self): + token = ConcreteToken("x") + token.set_index(5) + self.assertEqual(token._index, 5) + + def test_set_parent(self): + token = ConcreteToken("x") + parent = ConcreteRule([]) + token.set_parent(parent) + self.assertIs(token._parent, parent) diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py new file mode 100644 index 00000000..b49b3f38 --- /dev/null +++ b/test/unit/rules/test_containers.py @@ -0,0 +1,396 @@ +from unittest import TestCase + +from hcl2.rules.containers import ( + TupleRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemRule, + ObjectRule, +) +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + LPAR, + RPAR, + DOT, + EQ, + COLON, + COMMA, + NAME, + DBLQUOTE, + STRING_CHARS, + IntLiteral, + FloatLiteral, +) +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & Helpers --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_object_elem_key(identifier_name): + return ObjectElemKeyRule([_make_identifier(identifier_name)]) + + +def _make_object_elem(key_name, expr_value, sep=None): + key = _make_object_elem_key(key_name) + separator = sep or EQ() + return ObjectElemRule([key, separator, StubExpression(expr_value)]) + + +# --- TupleRule tests --- + + +class TestTupleRule(TestCase): + def test_lark_name(self): + self.assertEqual(TupleRule.lark_name(), "tuple") + + def test_elements_empty_tuple(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + expr = StubExpression(1) + rule = TupleRule([LSQB(), expr, RSQB()]) + self.assertEqual(rule.elements, [expr]) + + def test_elements_multiple(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + e3 = StubExpression(3) + rule = TupleRule([LSQB(), e1, COMMA(), e2, COMMA(), e3, RSQB()]) + self.assertEqual(rule.elements, [e1, e2, e3]) + + def test_elements_skips_non_expressions(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + nlc = _make_nlc("\n") + rule = TupleRule([LSQB(), nlc, e1, COMMA(), nlc, e2, RSQB()]) + self.assertEqual(len(rule.elements), 2) + + def test_serialize_default_returns_list(self): + rule = TupleRule( + [LSQB(), StubExpression(1), COMMA(), StubExpression(2), RSQB()] + ) + result = rule.serialize() + self.assertEqual(result, [1, 2]) + + def test_serialize_empty_returns_empty_list(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.serialize(), []) + + def test_serialize_single_element(self): + rule = TupleRule([LSQB(), StubExpression(42), RSQB()]) + self.assertEqual(rule.serialize(), [42]) + + def test_serialize_wrap_tuples(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[a, b]}") + + def test_serialize_wrap_tuples_empty(self): + rule = TupleRule([LSQB(), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[]}") + + def test_serialize_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("a"), RSQB()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "[a]") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "[a, b]") + + def test_serialize_wrap_tuples_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("x"), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + # Already inside $, so no extra wrapping + self.assertEqual(result, "[x]") + + +# --- ObjectElemKeyRule tests --- + + +class TestObjectElemKeyRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemKeyRule.lark_name(), "object_elem_key") + + def test_value_property_identifier(self): + ident = _make_identifier("foo") + rule = ObjectElemKeyRule([ident]) + self.assertIs(rule.value, ident) + + def test_serialize_identifier(self): + rule = ObjectElemKeyRule([_make_identifier("my_key")]) + self.assertEqual(rule.serialize(), "my_key") + + def test_serialize_int_lit(self): + rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) + self.assertEqual(rule.serialize(), 5) + + def test_serialize_float_lit(self): + rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) + self.assertAlmostEqual(rule.serialize(), 3.14) + + def test_serialize_string(self): + rule = ObjectElemKeyRule([_make_string_rule("k3")]) + self.assertEqual(rule.serialize(), '"k3"') + + +# --- ObjectElemKeyExpressionRule tests --- + + +class TestObjectElemKeyExpressionRule(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" + ) + + def test_expression_property(self): + expr = StubExpression("5 + 5") + rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + self.assertIs(rule.expression, expr) + + def test_serialize(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + result = rule.serialize() + self.assertEqual(result, "${(5 + 5)}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "(5 + 5)") + + +# --- ObjectElemKeyDotAccessor tests --- + + +class TestObjectElemKeyDotAccessor(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" + ) + + def test_identifiers_property(self): + i1 = _make_identifier("k5") + i2 = _make_identifier("attr") + i3 = _make_identifier("sub") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) + idents = rule.identifiers + self.assertEqual(len(idents), 3) + self.assertIs(idents[0], i1) + self.assertIs(idents[1], i2) + self.assertIs(idents[2], i3) + + def test_identifiers_two_segments(self): + i1 = _make_identifier("a") + i2 = _make_identifier("b") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) + self.assertEqual(len(rule.identifiers), 2) + + def test_serialize(self): + rule = ObjectElemKeyDotAccessor( + [ + _make_identifier("k5"), + DOT(), + _make_identifier("attr"), + DOT(), + _make_identifier("sub"), + ] + ) + self.assertEqual(rule.serialize(), "k5.attr.sub") + + def test_serialize_two_segments(self): + rule = ObjectElemKeyDotAccessor( + [_make_identifier("a"), DOT(), _make_identifier("b")] + ) + self.assertEqual(rule.serialize(), "a.b") + + +# --- ObjectElemRule tests --- + + +class TestObjectElemRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemRule.lark_name(), "object_elem") + + def test_key_property(self): + key = _make_object_elem_key("foo") + rule = ObjectElemRule([key, EQ(), StubExpression("bar")]) + self.assertIs(rule.key, key) + + def test_expression_property(self): + expr = StubExpression("bar") + rule = ObjectElemRule([_make_object_elem_key("foo"), EQ(), expr]) + self.assertIs(rule.expression, expr) + + def test_serialize_with_eq(self): + rule = _make_object_elem("name", "value") + self.assertEqual(rule.serialize(), {"name": "value"}) + + def test_serialize_with_colon(self): + rule = ObjectElemRule([_make_object_elem_key("k"), COLON(), StubExpression(42)]) + self.assertEqual(rule.serialize(), {"k": 42}) + + def test_serialize_int_value(self): + rule = _make_object_elem("count", 5) + self.assertEqual(rule.serialize(), {"count": 5}) + + def test_serialize_string_key(self): + key = ObjectElemKeyRule([_make_string_rule("quoted")]) + rule = ObjectElemRule([key, EQ(), StubExpression("val")]) + self.assertEqual(rule.serialize(), {'"quoted"': "val"}) + + +# --- ObjectRule tests --- + + +class TestObjectRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectRule.lark_name(), "object") + + def test_elements_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + elem = _make_object_elem("k", "v") + rule = ObjectRule([LBRACE(), elem, RBRACE()]) + self.assertEqual(rule.elements, [elem]) + + def test_elements_multiple(self): + e1 = _make_object_elem("a", 1) + e2 = _make_object_elem("b", 2) + rule = ObjectRule([LBRACE(), e1, e2, RBRACE()]) + self.assertEqual(rule.elements, [e1, e2]) + + def test_elements_skips_non_elem(self): + e1 = _make_object_elem("a", 1) + nlc = _make_nlc("\n") + rule = ObjectRule([LBRACE(), nlc, e1, nlc, RBRACE()]) + self.assertEqual(rule.elements, [e1]) + + def test_serialize_default_returns_dict(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + result = rule.serialize() + self.assertEqual(result, {"k1": "v1", "k2": "v2"}) + + def test_serialize_empty_returns_empty_dict(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.serialize(), {}) + + def test_serialize_single_element(self): + rule = ObjectRule([LBRACE(), _make_object_elem("x", 42), RBRACE()]) + self.assertEqual(rule.serialize(), {"x": 42}) + + def test_serialize_wrap_objects(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + # Result is "{k1 = v1, k2 = v2}" wrapped in ${}, giving ${{...}} + self.assertEqual(result, "${{k1 = v1, k2 = v2}}") + + def test_serialize_wrap_objects_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${{}}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "{k = v}") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("a", 1), + _make_object_elem("b", 2), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "{a = 1, b = 2}") + + def test_serialize_wrap_objects_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "{k = v}") diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py new file mode 100644 index 00000000..16800ed0 --- /dev/null +++ b/test/unit/rules/test_expressions.py @@ -0,0 +1,489 @@ +from unittest import TestCase + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ( + ExpressionRule, + ExprTermRule, + ConditionalRule, + BinaryTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.tokens import ( + LPAR, + RPAR, + QMARK, + COLON, + BINARY_OP, + NAME, + StringToken, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value, children=None): + self._stub_value = value + super().__init__(children or [], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +class NonExpressionRule(LarkRule): + """A rule that is NOT an ExpressionRule, for parent-chain tests.""" + + @staticmethod + def lark_name(): + return "non_expression" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "non_expr" + + +def _make_expr_term(value): + """Build ExprTermRule wrapping a StubExpression (no parens).""" + return ExprTermRule([StubExpression(value)]) + + +def _make_paren_expr_term(value): + """Build ExprTermRule wrapping a StubExpression in parentheses.""" + return ExprTermRule([LPAR(), StubExpression(value), RPAR()]) + + +def _make_binary_operator(op_str): + """Build BinaryOperatorRule for an operator string.""" + return BinaryOperatorRule([BINARY_OP(op_str)]) + + +def _make_binary_term(op_str, rhs_value): + """Build BinaryTermRule with given operator and RHS value.""" + return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) + + +MINUS_TOKEN = StringToken["MINUS"] +NOT_TOKEN = StringToken["NOT"] + + +# --- ExprTermRule tests --- + + +class TestExprTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(ExprTermRule.lark_name(), "expr_term") + + def test_construction_without_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertFalse(rule.parentheses) + + def test_construction_without_parens_children_structure(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + # children: [None, None, stub, None, None] + self.assertEqual(len(rule.children), 5) + self.assertIsNone(rule.children[0]) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIsNone(rule.children[4]) + + def test_construction_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertTrue(rule.parentheses) + + def test_construction_with_parens_children_structure(self): + stub = StubExpression("a") + lpar = LPAR() + rpar = RPAR() + rule = ExprTermRule([lpar, stub, rpar]) + # children: [LPAR, None, stub, None, RPAR] + self.assertEqual(len(rule.children), 5) + self.assertIs(rule.children[0], lpar) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIs(rule.children[4], rpar) + + def test_expression_property(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertIs(rule.expression, stub) + + def test_expression_property_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertIs(rule.expression, stub) + + def test_serialize_no_parens_delegates_to_inner(self): + rule = _make_expr_term("hello") + self.assertEqual(rule.serialize(), "hello") + + def test_serialize_no_parens_passes_through_int(self): + stub = StubExpression(42) + rule = ExprTermRule([stub]) + self.assertEqual(rule.serialize(), 42) + + def test_serialize_with_parens_wraps_and_dollar(self): + rule = _make_paren_expr_term("a") + result = rule.serialize() + self.assertEqual(result, "${(a)}") + + def test_serialize_with_parens_inside_dollar_string(self): + rule = _make_paren_expr_term("a") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string: wraps in () but NOT in ${} + self.assertEqual(result, "(a)") + + def test_serialize_sets_inside_parentheses_context(self): + """When parenthesized, inner expression should see inside_parentheses=True.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([LPAR(), ContextCapture([]), RPAR()]) + rule.serialize() + self.assertTrue(seen_context["inside_parentheses"]) + + def test_serialize_no_parens_preserves_inside_parentheses(self): + """Without parens, inside_parentheses passes through from caller context.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([ContextCapture([])]) + rule.serialize(context=SerializationContext(inside_parentheses=False)) + self.assertFalse(seen_context["inside_parentheses"]) + + +# --- ConditionalRule tests --- + + +class TestConditionalRule(TestCase): + def _make_conditional(self, cond_val="cond", true_val="yes", false_val="no"): + return ConditionalRule( + [ + StubExpression(cond_val), + QMARK(), + StubExpression(true_val), + COLON(), + StubExpression(false_val), + ] + ) + + def test_lark_name(self): + self.assertEqual(ConditionalRule.lark_name(), "conditional") + + def test_construction_inserts_optional_slots(self): + rule = self._make_conditional() + # Should have 8 children after _insert_optionals at [2, 4, 6] + self.assertEqual(len(rule.children), 8) + + def test_condition_property(self): + cond = StubExpression("cond") + rule = ConditionalRule( + [cond, QMARK(), StubExpression("t"), COLON(), StubExpression("f")] + ) + self.assertIs(rule.condition, cond) + + def test_if_true_property(self): + true_expr = StubExpression("yes") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + true_expr, + COLON(), + StubExpression("f"), + ] + ) + self.assertIs(rule.if_true, true_expr) + + def test_if_false_property(self): + false_expr = StubExpression("no") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + StubExpression("t"), + COLON(), + false_expr, + ] + ) + self.assertIs(rule.if_false, false_expr) + + def test_serialize_format(self): + rule = self._make_conditional("a", "b", "c") + result = rule.serialize() + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_wraps_in_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + result = rule.serialize() + self.assertTrue(result.startswith("${")) + self.assertTrue(result.endswith("}")) + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "x ? y : z") + + def test_serialize_force_parens_no_parent(self): + """force_operation_parentheses with no parent → no wrapping.""" + rule = self._make_conditional("a", "b", "c") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + # No parent, so _wrap_into_parentheses returns unchanged + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_force_parens_with_expression_parent(self): + """force_operation_parentheses with ExpressionRule parent → wraps.""" + rule = self._make_conditional("a", "b", "c") + # Nest inside another expression to set parent + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a ? b : c)}") + + +# --- BinaryTermRule tests --- + + +class TestBinaryTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryTermRule.lark_name(), "binary_term") + + def test_construction_inserts_optional(self): + rule = _make_binary_term("+", "b") + # [BinaryOperatorRule, None, ExprTermRule] + self.assertEqual(len(rule.children), 3) + self.assertIsNone(rule.children[1]) + + def test_binary_operator_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.binary_operator, op) + + def test_expr_term_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.expr_term, rhs) + + def test_serialize(self): + rule = _make_binary_term("+", "b") + result = rule.serialize() + self.assertEqual(result, "+ b") + + def test_serialize_equals_operator(self): + rule = _make_binary_term("==", "x") + self.assertEqual(rule.serialize(), "== x") + + def test_serialize_and_operator(self): + rule = _make_binary_term("&&", "y") + self.assertEqual(rule.serialize(), "&& y") + + +# --- BinaryOpRule tests --- + + +class TestBinaryOpRule(TestCase): + def _make_binary_op(self, lhs_val, op_str, rhs_val): + lhs = _make_expr_term(lhs_val) + bt = _make_binary_term(op_str, rhs_val) + return BinaryOpRule([lhs, bt, None]) + + def test_lark_name(self): + self.assertEqual(BinaryOpRule.lark_name(), "binary_op") + + def test_expr_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.expr_term, lhs) + + def test_binary_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.binary_term, bt) + + def test_serialize_addition(self): + rule = self._make_binary_op("a", "+", "b") + self.assertEqual(rule.serialize(), "${a + b}") + + def test_serialize_equality(self): + rule = self._make_binary_op("x", "==", "y") + self.assertEqual(rule.serialize(), "${x == y}") + + def test_serialize_and(self): + rule = self._make_binary_op("p", "&&", "q") + self.assertEqual(rule.serialize(), "${p && q}") + + def test_serialize_multiply(self): + rule = self._make_binary_op("a", "*", "b") + self.assertEqual(rule.serialize(), "${a * b}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_binary_op("a", "+", "b") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "a + b") + + def test_serialize_force_parens_no_parent(self): + """No parent → _wrap_into_parentheses returns unchanged.""" + rule = self._make_binary_op("a", "+", "b") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${a + b}") + + def test_serialize_force_parens_with_expression_parent(self): + """With ExpressionRule parent → wraps in parens.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a + b)}") + + def test_serialize_force_parens_inside_dollar_string_with_parent(self): + """Inside dollar string + parent → parens without extra ${}.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "(a + b)") + + +# --- UnaryOpRule tests --- + + +class TestUnaryOpRule(TestCase): + def _make_unary(self, op_str, operand_val): + token_cls = MINUS_TOKEN if op_str == "-" else NOT_TOKEN + token = token_cls(op_str) + expr_term = _make_expr_term(operand_val) + return UnaryOpRule([token, expr_term]) + + def test_lark_name(self): + self.assertEqual(UnaryOpRule.lark_name(), "unary_op") + + def test_operator_property_minus(self): + rule = self._make_unary("-", "x") + self.assertEqual(rule.operator, "-") + + def test_operator_property_not(self): + rule = self._make_unary("!", "x") + self.assertEqual(rule.operator, "!") + + def test_expr_term_property(self): + expr_term = _make_expr_term("x") + token = MINUS_TOKEN("-") + rule = UnaryOpRule([token, expr_term]) + self.assertIs(rule.expr_term, expr_term) + + def test_serialize_minus(self): + rule = self._make_unary("-", "a") + self.assertEqual(rule.serialize(), "${-a}") + + def test_serialize_not(self): + rule = self._make_unary("!", "flag") + self.assertEqual(rule.serialize(), "${!flag}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_unary("-", "x") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "-x") + + def test_serialize_force_parens_no_parent(self): + rule = self._make_unary("-", "x") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${-x}") + + def test_serialize_force_parens_with_expression_parent(self): + rule = self._make_unary("-", "x") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(-x)}") + + +# --- ExpressionRule._wrap_into_parentheses tests --- + + +class TestWrapIntoParenthesesMethod(TestCase): + def test_returns_unchanged_when_inside_parentheses(self): + expr = StubExpression("test") + ctx = SerializationContext(inside_parentheses=True) + result = expr._wrap_into_parentheses("${x}", context=ctx) + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_no_parent(self): + expr = StubExpression("test") + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_parent_not_expression(self): + expr = StubExpression("test") + NonExpressionRule([expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_wraps_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_wraps_plain_string_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("a + b") + self.assertEqual(result, "(a + b)") + + def test_expr_term_parent_with_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is ExpressionRule → wraps.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + # inner is now at expr_term._children[2], parent=expr_term + StubExpression("grandparent", children=[expr_term]) + # expr_term.parent = grandparent (ExpressionRule) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_expr_term_parent_with_non_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is NOT ExpressionRule → no wrap.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + NonExpressionRule([expr_term]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_expr_term_parent_with_no_grandparent(self): + """Parent is ExprTermRule with no parent → no wrap.""" + inner = StubExpression("test") + ExprTermRule([inner]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py new file mode 100644 index 00000000..f6b8b94c --- /dev/null +++ b/test/unit/rules/test_literal_rules.py @@ -0,0 +1,95 @@ +from unittest import TestCase + +from hcl2.rules.literal_rules import ( + TokenRule, + KeywordRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + BinaryOperatorRule, +) +from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral + + +class TestKeywordRule(TestCase): + def test_lark_name(self): + self.assertEqual(KeywordRule.lark_name(), "keyword") + + def test_token_property(self): + token = NAME("true") + rule = KeywordRule([token]) + self.assertIs(rule.token, token) + + def test_serialize(self): + rule = KeywordRule([NAME("true")]) + self.assertEqual(rule.serialize(), "true") + + +class TestIdentifierRule(TestCase): + def test_lark_name(self): + self.assertEqual(IdentifierRule.lark_name(), "identifier") + + def test_serialize(self): + rule = IdentifierRule([NAME("my_var")]) + self.assertEqual(rule.serialize(), "my_var") + + def test_token_property(self): + token = NAME("foo") + rule = IdentifierRule([token]) + self.assertIs(rule.token, token) + + +class TestIntLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(IntLitRule.lark_name(), "int_lit") + + def test_serialize_returns_int(self): + rule = IntLitRule([IntLiteral("42")]) + result = rule.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + +class TestFloatLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(FloatLitRule.lark_name(), "float_lit") + + def test_serialize_returns_float(self): + rule = FloatLitRule([FloatLiteral("3.14")]) + result = rule.serialize() + self.assertAlmostEqual(result, 3.14) + self.assertIsInstance(result, float) + + +class TestBinaryOperatorRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryOperatorRule.lark_name(), "binary_operator") + + def test_serialize_plus(self): + rule = BinaryOperatorRule([BINARY_OP("+")]) + self.assertEqual(rule.serialize(), "+") + + def test_serialize_equals(self): + rule = BinaryOperatorRule([BINARY_OP("==")]) + self.assertEqual(rule.serialize(), "==") + + def test_serialize_and(self): + rule = BinaryOperatorRule([BINARY_OP("&&")]) + self.assertEqual(rule.serialize(), "&&") + + def test_serialize_or(self): + rule = BinaryOperatorRule([BINARY_OP("||")]) + self.assertEqual(rule.serialize(), "||") + + def test_serialize_gt(self): + rule = BinaryOperatorRule([BINARY_OP(">")]) + self.assertEqual(rule.serialize(), ">") + + def test_serialize_multiply(self): + rule = BinaryOperatorRule([BINARY_OP("*")]) + self.assertEqual(rule.serialize(), "*") + + def test_token_property(self): + token = BINARY_OP("+") + rule = BinaryOperatorRule([token]) + self.assertIs(rule.token, token) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py new file mode 100644 index 00000000..67fec075 --- /dev/null +++ b/test/unit/rules/test_strings.py @@ -0,0 +1,247 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.strings import ( + InterpolationRule, + StringPartRule, + StringRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +# --- Helpers --- + + +def _make_string_part_chars(text): + return StringPartRule([STRING_CHARS(text)]) + + +def _make_string_part_escaped(text): + return StringPartRule([ESCAPED_INTERPOLATION(text)]) + + +def _make_string_part_interpolation(expr_value): + interp = InterpolationRule([INTERP_START(), StubExpression(expr_value), RBRACE()]) + return StringPartRule([interp]) + + +def _make_string(parts): + """Build StringRule from a list of StringPartRule children.""" + return StringRule([DBLQUOTE(), *parts, DBLQUOTE()]) + + +# --- InterpolationRule tests --- + + +class TestInterpolationRule(TestCase): + def test_lark_name(self): + self.assertEqual(InterpolationRule.lark_name(), "interpolation") + + def test_expression_property(self): + expr = StubExpression("var.name") + rule = InterpolationRule([INTERP_START(), expr, RBRACE()]) + self.assertIs(rule.expression, expr) + + def test_serialize_wraps_in_dollar_string(self): + rule = InterpolationRule([INTERP_START(), StubExpression("var.name"), RBRACE()]) + self.assertEqual(rule.serialize(), "${var.name}") + + def test_serialize_idempotent_if_already_dollar(self): + rule = InterpolationRule([INTERP_START(), StubExpression("${x}"), RBRACE()]) + self.assertEqual(rule.serialize(), "${x}") + + def test_serialize_expression_result(self): + rule = InterpolationRule([INTERP_START(), StubExpression("a + b"), RBRACE()]) + self.assertEqual(rule.serialize(), "${a + b}") + + +# --- StringPartRule tests --- + + +class TestStringPartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringPartRule.lark_name(), "string_part") + + def test_content_property_string_chars(self): + token = STRING_CHARS("hello") + rule = StringPartRule([token]) + self.assertIs(rule.content, token) + + def test_serialize_string_chars(self): + rule = _make_string_part_chars("hello world") + self.assertEqual(rule.serialize(), "hello world") + + def test_serialize_escaped_interpolation(self): + rule = _make_string_part_escaped("$${aws:username}") + self.assertEqual(rule.serialize(), "$${aws:username}") + + def test_serialize_interpolation(self): + rule = _make_string_part_interpolation("var.name") + self.assertEqual(rule.serialize(), "${var.name}") + + def test_content_property_interpolation(self): + interp = InterpolationRule([INTERP_START(), StubExpression("x"), RBRACE()]) + rule = StringPartRule([interp]) + self.assertIs(rule.content, interp) + + +# --- StringRule tests --- + + +class TestStringRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringRule.lark_name(), "string") + + def test_string_parts_property(self): + p1 = _make_string_part_chars("hello") + p2 = _make_string_part_chars(" world") + rule = _make_string([p1, p2]) + self.assertEqual(rule.string_parts, [p1, p2]) + + def test_string_parts_empty(self): + rule = _make_string([]) + self.assertEqual(rule.string_parts, []) + + def test_serialize_plain_string(self): + rule = _make_string([_make_string_part_chars("hello")]) + self.assertEqual(rule.serialize(), '"hello"') + + def test_serialize_empty_string(self): + rule = _make_string([]) + self.assertEqual(rule.serialize(), '""') + + def test_serialize_concatenated_parts(self): + rule = _make_string( + [ + _make_string_part_chars("prefix:"), + _make_string_part_interpolation("var.name"), + _make_string_part_chars("-suffix"), + ] + ) + self.assertEqual(rule.serialize(), '"prefix:${var.name}-suffix"') + + def test_serialize_escaped_and_interpolation(self): + rule = _make_string( + [ + _make_string_part_interpolation("bar"), + _make_string_part_escaped("$${baz:bat}"), + ] + ) + self.assertEqual(rule.serialize(), '"${bar}$${baz:bat}"') + + def test_serialize_only_interpolation(self): + rule = _make_string([_make_string_part_interpolation("x")]) + self.assertEqual(rule.serialize(), '"${x}"') + + +# --- HeredocTemplateRule tests --- + + +class TestHeredocTemplateRule(TestCase): + def test_lark_name(self): + self.assertEqual(HeredocTemplateRule.lark_name(), "heredoc_template") + + def test_heredoc_property(self): + token = HEREDOC_TEMPLATE("< str: + return "test_inline" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +def _make_nlc(text): + """Helper: build NewLineOrCommentRule from a string.""" + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +# --- Tests --- + + +class TestNewLineOrCommentRule(TestCase): + def test_lark_name(self): + self.assertEqual(NewLineOrCommentRule.lark_name(), "new_line_or_comment") + + def test_serialize_newline(self): + rule = _make_nlc("\n") + self.assertEqual(rule.serialize(), "\n") + + def test_serialize_line_comment(self): + rule = _make_nlc("// this is a comment\n") + self.assertEqual(rule.serialize(), "// this is a comment\n") + + def test_serialize_hash_comment(self): + rule = _make_nlc("# hash comment\n") + self.assertEqual(rule.serialize(), "# hash comment\n") + + def test_to_list_bare_newline_returns_none(self): + rule = _make_nlc("\n") + self.assertIsNone(rule.to_list()) + + def test_to_list_line_comment(self): + rule = _make_nlc("// my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_hash_comment(self): + rule = _make_nlc("# my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_block_comment(self): + rule = _make_nlc("/* block comment */\n") + result = rule.to_list() + self.assertEqual(result, ["block comment"]) + + def test_to_list_multiple_comments(self): + rule = _make_nlc("// first\n// second\n") + result = rule.to_list() + self.assertIn("first", result) + self.assertIn("second", result) + + def test_token_property(self): + token = NL_OR_COMMENT("\n") + rule = NewLineOrCommentRule([token]) + self.assertIs(rule.token, token) + + +class TestInlineCommentMixIn(TestCase): + def test_insert_optionals_inserts_none_where_no_comment(self): + from hcl2.rules.tokens import NAME + + token = NAME("x") + children = [token, NAME("y")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should have inserted None at index 1, pushing NAME("y") to index 2 + self.assertIsNone(children[1]) + self.assertEqual(len(children), 3) + + def test_insert_optionals_leaves_comment_in_place(self): + comment = _make_nlc("// comment\n") + from hcl2.rules.tokens import NAME + + children = [NAME("x"), comment] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should NOT insert None since index 1 is already a NewLineOrCommentRule + self.assertIs(children[1], comment) + self.assertEqual(len(children), 2) + + def test_insert_optionals_handles_index_error(self): + children = [_make_nlc("\n")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [3]) + # Should insert None at index 3 + self.assertEqual(len(children), 2) + self.assertIsNone(children[1]) + + def test_inline_comments_collects_from_children(self): + comment = _make_nlc("// hello\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), comment]) + result = rule.inline_comments() + self.assertEqual(result, ["hello"]) + + def test_inline_comments_skips_bare_newlines(self): + newline = _make_nlc("\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), newline]) + result = rule.inline_comments() + self.assertEqual(result, []) + + def test_inline_comments_recursive(self): + comment = _make_nlc("// inner\n") + inner = ConcreteInlineComment([comment]) + outer = ConcreteInlineComment([inner]) + result = outer.inline_comments() + self.assertEqual(result, ["inner"]) + + def test_inline_comments_empty(self): + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x")]) + result = rule.inline_comments() + self.assertEqual(result, []) diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py new file mode 100644 index 00000000..f5f94e8c --- /dev/null +++ b/test/unit/test_utils.py @@ -0,0 +1,148 @@ +from unittest import TestCase + +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + is_dollar_string, + to_dollar_string, + unwrap_dollar_string, + wrap_into_parentheses, +) + + +class TestSerializationOptions(TestCase): + def test_default_values(self): + opts = SerializationOptions() + self.assertTrue(opts.with_comments) + self.assertFalse(opts.with_meta) + self.assertFalse(opts.wrap_objects) + self.assertFalse(opts.wrap_tuples) + self.assertTrue(opts.explicit_blocks) + self.assertTrue(opts.preserve_heredocs) + self.assertFalse(opts.force_operation_parentheses) + + def test_custom_values(self): + opts = SerializationOptions( + with_comments=False, + with_meta=True, + force_operation_parentheses=True, + ) + self.assertFalse(opts.with_comments) + self.assertTrue(opts.with_meta) + self.assertTrue(opts.force_operation_parentheses) + + +class TestSerializationContext(TestCase): + def test_default_values(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_replace_returns_new_instance(self): + ctx = SerializationContext() + new_ctx = ctx.replace(inside_dollar_string=True) + self.assertIsNot(ctx, new_ctx) + self.assertFalse(ctx.inside_dollar_string) + self.assertTrue(new_ctx.inside_dollar_string) + + def test_modify_mutates_and_restores(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + + with ctx.modify(inside_dollar_string=True): + self.assertTrue(ctx.inside_dollar_string) + + self.assertFalse(ctx.inside_dollar_string) + + def test_modify_restores_on_exception(self): + ctx = SerializationContext() + + with self.assertRaises(ValueError): + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + raise ValueError("test") + + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_modify_multiple_fields(self): + ctx = SerializationContext() + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_copy_yields_independent_copy(self): + ctx = SerializationContext() + with ctx.copy(inside_dollar_string=True) as copied: + self.assertTrue(copied.inside_dollar_string) + self.assertFalse(ctx.inside_dollar_string) + self.assertIsNot(ctx, copied) + + +class TestIsDollarString(TestCase): + def test_valid_dollar_string(self): + self.assertTrue(is_dollar_string("${x}")) + + def test_nested_dollar_string(self): + self.assertTrue(is_dollar_string("${a + b}")) + + def test_plain_string(self): + self.assertFalse(is_dollar_string("foo")) + + def test_incomplete_prefix(self): + self.assertFalse(is_dollar_string("${")) + + def test_non_string_input(self): + self.assertFalse(is_dollar_string(42)) + self.assertFalse(is_dollar_string(None)) + + def test_empty_dollar_string(self): + self.assertTrue(is_dollar_string("${}")) + + def test_dollar_without_brace(self): + self.assertFalse(is_dollar_string("$x}")) + + def test_missing_closing_brace(self): + self.assertFalse(is_dollar_string("${x")) + + +class TestToDollarString(TestCase): + def test_wraps_plain_string(self): + self.assertEqual(to_dollar_string("x"), "${x}") + + def test_idempotent_on_dollar_string(self): + self.assertEqual(to_dollar_string("${x}"), "${x}") + + def test_wraps_empty(self): + self.assertEqual(to_dollar_string(""), "${}") + + def test_wraps_expression(self): + self.assertEqual(to_dollar_string("a + b"), "${a + b}") + + +class TestUnwrapDollarString(TestCase): + def test_strips_wrapping(self): + self.assertEqual(unwrap_dollar_string("${x}"), "x") + + def test_noop_on_plain_string(self): + self.assertEqual(unwrap_dollar_string("foo"), "foo") + + def test_strips_complex_expression(self): + self.assertEqual(unwrap_dollar_string("${a + b}"), "a + b") + + +class TestWrapIntoParentheses(TestCase): + def test_plain_string(self): + self.assertEqual(wrap_into_parentheses("x"), "(x)") + + def test_dollar_string(self): + self.assertEqual(wrap_into_parentheses("${x}"), "${(x)}") + + def test_expression_string(self): + self.assertEqual(wrap_into_parentheses("a + b"), "(a + b)") + + def test_dollar_expression(self): + self.assertEqual(wrap_into_parentheses("${a + b}"), "${(a + b)}") From 7662a5e039db786e9400531df0516154e02de666 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:33 +0100 Subject: [PATCH 21/45] rewrite api.py, update builder.py, add unit tests for them --- hcl2/__init__.py | 13 +- hcl2/__main__.py | 4 +- hcl2/api.py | 219 ++++++++++++++++++++++++++++------ hcl2/builder.py | 17 ++- hcl2/deserializer.py | 4 +- test/unit/test_api.py | 244 ++++++++++++++++++++++++++++++++++++++ test/unit/test_builder.py | 157 ++++++++++++++++++++++++ 7 files changed, 607 insertions(+), 51 deletions(-) create mode 100644 test/unit/test_api.py create mode 100644 test/unit/test_builder.py diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 2d5dad09..d3a9ea7b 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -8,10 +8,21 @@ from .api import ( load, loads, + dump, + dumps, parse, parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, transform, - writes, + serialize, ) from .builder import Builder +from .deserializer import DeserializerOptions +from .formatter import FormatterOptions +from .rules.base import StartRule +from .utils import SerializationOptions diff --git a/hcl2/__main__.py b/hcl2/__main__.py index 17a021e1..f1a58938 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -19,6 +19,7 @@ from lark import UnexpectedCharacters, UnexpectedToken from . import load +from .utils import SerializationOptions from .version import __version__ @@ -58,7 +59,8 @@ def main(): else open(args.OUT_PATH, "w", encoding="utf-8") ) print(args.PATH, file=sys.stderr, flush=True) - json.dump(load(in_file, with_meta=args.with_meta), out_file) + options = SerializationOptions(with_meta=True) if args.with_meta else None + json.dump(load(in_file, serialization_options=options), out_file) if args.OUT_PATH is None: out_file.write("\n") out_file.close() diff --git a/hcl2/api.py b/hcl2/api.py index 7c384c53..0238f418 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -1,60 +1,205 @@ -"""The API that will be exposed to users of this package""" -from typing import TextIO +"""The API that will be exposed to users of this package. + +Follows the json module convention: load/loads for reading, dump/dumps for writing. +Also exposes intermediate pipeline stages for advanced usage. +""" + +import json as _json +from typing import TextIO, Optional from lark.tree import Tree -from hcl2.parser import parser + +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter, FormatterOptions +from hcl2.parser import parser as _get_parser from hcl2.reconstructor import HCLReconstructor +from hcl2.rules.base import StartRule from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + + +# --------------------------------------------------------------------------- +# Primary API: load / loads / dump / dumps +# --------------------------------------------------------------------------- + + +def load( + file: TextIO, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load a HCL2 file and return a Python dict. + + :param file: File with HCL2 content. + :param serialization_options: Options controlling serialization behavior. + """ + return loads(file.read(), serialization_options=serialization_options) + + +def loads( + text: str, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load HCL2 from a string and return a Python dict. + :param text: HCL2 text. + :param serialization_options: Options controlling serialization behavior. + """ + tree = parses(text) + return serialize(tree, serialization_options=serialization_options) + + +def dump( + data: dict, + file: TextIO, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> None: + """Write a Python dict as HCL2 to a file. -def load(file: TextIO, with_meta=False) -> dict: - """Load a HCL2 file. - :param file: File with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param data: Python dict (as produced by :func:`load`). + :param file: Writable text file. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - return loads(file.read(), with_meta=with_meta) + file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) -def loads(text: str, with_meta=False) -> dict: - """Load HCL2 from a string. - :param text: Text with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def dumps( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> str: + """Convert a Python dict to an HCL2 string. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - # append new line as a workaround for https://github.com/lark-parser/lark/issues/237 + tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + return reconstruct(tree) + + +# --------------------------------------------------------------------------- +# Parsing: HCL text -> LarkElement tree or raw Lark tree +# --------------------------------------------------------------------------- + + +def parse(file: TextIO, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 file into a LarkElement tree. + + :param file: File with HCL2 content. + :param discard_comments: If True, discard comments during transformation. + """ + return parses(file.read(), discard_comments=discard_comments) + + +def parses(text: str, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 string into a LarkElement tree. + + :param text: HCL2 text. + :param discard_comments: If True, discard comments during transformation. + """ + lark_tree = parses_to_tree(text) + return transform(lark_tree, discard_comments=discard_comments) + + +def parse_to_tree(file: TextIO) -> Tree: + """Parse a HCL2 file into a raw Lark parse tree. + + :param file: File with HCL2 content. + """ + return parses_to_tree(file.read()) + + +def parses_to_tree(text: str) -> Tree: + """Parse a HCL2 string into a raw Lark parse tree. + + :param text: HCL2 text. + """ + # Append newline as workaround for https://github.com/lark-parser/lark/issues/237 # Lark doesn't support EOF token so our grammar can't look for "new line or end of file" - # This means that all blocks must end in a new line even if the file ends - # Append a new line as a temporary fix - tree = parser().parse(text + "\n") - return RuleTransformer().transform(tree) + return _get_parser().parse(text + "\n") -def parse(file: TextIO) -> Tree: - """Load HCL2 syntax tree from a file. - :param file: File with hcl2 to be loaded as a dict. +# --------------------------------------------------------------------------- +# Intermediate pipeline stages +# --------------------------------------------------------------------------- + + +def from_dict( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a Python dict into a LarkElement tree. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. + """ + deserializer = BaseDeserializer(deserializer_options) + tree = deserializer.load_python(data) + if format: + formatter = BaseFormatter(formatter_options) + formatter.format_tree(tree) + return tree + + +def from_json( + text: str, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a JSON string into a LarkElement tree. + + :param text: JSON string. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. """ - return parses(file.read()) + data = _json.loads(text) + return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + +def reconstruct(tree) -> str: + """Convert a LarkElement tree (or raw Lark tree) to an HCL2 string. -def parses(text: str) -> Tree: - """Load HCL2 syntax tree from a string. - :param text: Text with hcl2 to be loaded as a dict. + :param tree: A :class:`StartRule` (LarkElement tree) or :class:`lark.Tree`. """ - return parser().parse(text) + reconstructor = HCLReconstructor() + if isinstance(tree, StartRule): + tree = tree.to_lark() + return reconstructor.reconstruct(tree) -def transform(ast: Tree, with_meta=False) -> dict: - """Convert an HCL2 AST to a dictionary. - :param ast: HCL2 syntax tree, output from `parse` or `parses` - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: + """Transform a raw Lark parse tree into a LarkElement tree. + + :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. + :param discard_comments: If True, discard comments during transformation. """ - return RuleTransformer().transform(ast) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + +def serialize( + tree: StartRule, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Serialize a LarkElement tree to a Python dict. -def writes(ast: Tree) -> str: - """Convert an HCL2 syntax tree to a string. - :param ast: HCL2 syntax tree, output from `parse` or `parses` + :param tree: A :class:`StartRule` (LarkElement tree). + :param serialization_options: Options controlling serialization behavior. """ - return HCLReconstructor().reconstruct(ast) + if serialization_options is not None: + return tree.serialize(options=serialization_options) + return tree.serialize() diff --git a/hcl2/builder.py b/hcl2/builder.py index b5b149da..5ef0c416 100644 --- a/hcl2/builder.py +++ b/hcl2/builder.py @@ -3,18 +3,16 @@ from collections import defaultdict -from hcl2.const import START_LINE_KEY, END_LINE_KEY +from hcl2.const import IS_BLOCK class Builder: """ The `hcl2.Builder` class produces a dictionary that should be identical to the - output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword - argument is important here. HCL "blocks" in the Python dictionary are - identified by the presence of `__start_line__` and `__end_line__` metadata - within them. The `Builder` class handles adding that metadata. If that metadata - is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to - identify what is a block and what is just an attribute with an object value. + output of `hcl2.load(example_file)`. HCL "blocks" in the Python dictionary are + identified by the presence of `__is_block__: True` markers within them. + The `Builder` class handles adding that marker. If that marker is missing, + the deserializer fails to distinguish blocks from regular object attributes. """ def __init__(self, attributes: Optional[dict] = None): @@ -49,8 +47,7 @@ def build(self): body.update( { - START_LINE_KEY: -1, - END_LINE_KEY: -1, + IS_BLOCK: True, **self.attributes, } ) @@ -79,7 +76,7 @@ def _add_nested_blocks( """Add nested blocks defined within another `Builder` instance to the `block` dictionary""" nested_block = nested_blocks_builder.build() for key, value in nested_block.items(): - if key not in (START_LINE_KEY, END_LINE_KEY): + if key != IS_BLOCK: if key not in block.keys(): block[key] = [] block[key].extend(value) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index d6b4d4c2..0ca91b48 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -6,7 +6,7 @@ from regex import regex -from hcl2 import parses +from hcl2.parser import parser as _get_parser from hcl2.const import IS_BLOCK from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( @@ -217,7 +217,7 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # create HCL2 snippet value = f"temp = {value}" # parse the above - parsed_tree = parses(value) + parsed_tree = _get_parser().parse(value) # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree diff --git a/test/unit/test_api.py b/test/unit/test_api.py new file mode 100644 index 00000000..a87d9e32 --- /dev/null +++ b/test/unit/test_api.py @@ -0,0 +1,244 @@ +from io import StringIO +from unittest import TestCase + +from hcl2.api import ( + load, + loads, + dump, + dumps, + parse, + parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, + transform, + serialize, +) +from hcl2.rules.base import StartRule +from hcl2.utils import SerializationOptions +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from lark.tree import Tree + + +SIMPLE_HCL = 'x = 5\n' +SIMPLE_DICT = {"x": 5} + +BLOCK_HCL = 'resource "aws_instance" "example" {\n ami = "abc-123"\n}\n' + + +class TestLoads(TestCase): + + def test_simple_attribute(self): + result = loads(SIMPLE_HCL) + self.assertEqual(result["x"], 5) + + def test_returns_dict(self): + result = loads(SIMPLE_HCL) + self.assertIsInstance(result, dict) + + def test_with_serialization_options(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_meta_option(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_meta=True)) + self.assertIn("x", result) + + def test_block_parsing(self): + result = loads(BLOCK_HCL) + self.assertIn("resource", result) + + +class TestLoad(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = load(f) + self.assertEqual(result["x"], 5) + + def test_with_serialization_options(self): + f = StringIO(SIMPLE_HCL) + result = load(f, serialization_options=SerializationOptions(with_comments=False)) + self.assertEqual(result["x"], 5) + + +class TestDumps(TestCase): + + def test_simple_attribute(self): + result = dumps(SIMPLE_DICT) + self.assertIsInstance(result, str) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_dumps_contains_key_and_value(self): + result = dumps(SIMPLE_DICT) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_with_deserializer_options(self): + result = dumps(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, str) + + def test_with_formatter_options(self): + result = dumps(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, str) + + +class TestDump(TestCase): + + def test_writes_to_file(self): + f = StringIO() + dump(SIMPLE_DICT, f) + output = f.getvalue() + self.assertIn("x", output) + self.assertIn("5", output) + + +class TestParsesToTree(TestCase): + + def test_returns_lark_tree(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertIsInstance(result, Tree) + + def test_tree_has_start_rule(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertEqual(result.data, "start") + + +class TestParseToTree(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse_to_tree(f) + self.assertIsInstance(result, Tree) + + +class TestParses(TestCase): + + def test_returns_start_rule(self): + result = parses(SIMPLE_HCL) + self.assertIsInstance(result, StartRule) + + def test_discard_comments_false(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=False) + serialized = serialize(result) + self.assertIn("__comments__", serialized) + + def test_discard_comments_true(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestParse(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse(f) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + f = StringIO('# comment\nx = 5\n') + result = parse(f, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestTransform(TestCase): + + def test_transforms_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = transform(lark_tree) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + lark_tree = parses_to_tree('# comment\nx = 5\n') + result = transform(lark_tree, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestSerialize(TestCase): + + def test_returns_dict(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_options(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + + def test_none_options_uses_defaults(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=None) + self.assertEqual(result["x"], 5) + + +class TestFromDict(TestCase): + + def test_returns_start_rule(self): + result = from_dict(SIMPLE_DICT) + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_dict(SIMPLE_DICT) + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_dict(SIMPLE_DICT, format=False) + self.assertIsInstance(result, StartRule) + + def test_with_deserializer_options(self): + result = from_dict(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, StartRule) + + def test_with_formatter_options(self): + result = from_dict(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, StartRule) + + +class TestFromJson(TestCase): + + def test_returns_start_rule(self): + result = from_json('{"x": 5}') + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_json('{"x": 5}') + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_json('{"x": 5}', format=False) + self.assertIsInstance(result, StartRule) + + +class TestReconstruct(TestCase): + + def test_from_start_rule(self): + tree = parses(SIMPLE_HCL) + result = reconstruct(tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_from_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = reconstruct(lark_tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_roundtrip(self): + tree = parses(SIMPLE_HCL) + hcl_text = reconstruct(tree) + reparsed = loads(hcl_text) + self.assertEqual(reparsed["x"], 5) diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py new file mode 100644 index 00000000..5d411c64 --- /dev/null +++ b/test/unit/test_builder.py @@ -0,0 +1,157 @@ +from unittest import TestCase + +from hcl2.builder import Builder +from hcl2.const import IS_BLOCK + + +class TestBuilderAttributes(TestCase): + + def test_empty_builder(self): + b = Builder() + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + + def test_with_attributes(self): + b = Builder({"key": "value", "count": 3}) + result = b.build() + self.assertEqual(result["key"], "value") + self.assertEqual(result["count"], 3) + + def test_is_block_marker_present(self): + b = Builder({"x": 1}) + result = b.build() + self.assertTrue(result[IS_BLOCK]) + + +class TestBuilderBlock(TestCase): + + def test_simple_block(self): + b = Builder() + b.block("resource") + result = b.build() + self.assertIn("resource", result) + self.assertEqual(len(result["resource"]), 1) + + def test_block_with_labels(self): + b = Builder() + b.block("resource", labels=["aws_instance", "example"]) + result = b.build() + block_entry = result["resource"][0] + self.assertIn("aws_instance", block_entry) + inner = block_entry["aws_instance"] + self.assertIn("example", inner) + + def test_block_with_attributes(self): + b = Builder() + b.block("resource", labels=["type"], ami="abc-123") + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["ami"], "abc-123") + + def test_multiple_blocks_same_type(self): + b = Builder() + b.block("resource", labels=["type_a"]) + b.block("resource", labels=["type_b"]) + result = b.build() + self.assertEqual(len(result["resource"]), 2) + + def test_multiple_block_types(self): + b = Builder() + b.block("resource") + b.block("data") + result = b.build() + self.assertIn("resource", result) + self.assertIn("data", result) + + def test_block_returns_builder(self): + b = Builder() + child = b.block("resource") + self.assertIsInstance(child, Builder) + + def test_block_child_attributes(self): + b = Builder() + child = b.block("resource", labels=["type"]) + child.attributes["nested_key"] = "nested_val" + # Rebuild to pick up the changes + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["nested_key"], "nested_val") + + def test_self_reference_raises(self): + b = Builder() + with self.assertRaises(ValueError): + b.block("resource", __nested_builder__=b) + + +class TestBuilderNestedBlocks(TestCase): + + def test_nested_builder(self): + b = Builder() + inner = Builder() + inner.block("provisioner", labels=["local-exec"], command="echo hello") + b.block("resource", labels=["type"], __nested_builder__=inner) + result = b.build() + block = result["resource"][0]["type"] + self.assertIn("provisioner", block) + + def test_nested_blocks_merged(self): + b = Builder() + inner = Builder() + inner.block("sub_block", x=1) + inner.block("sub_block", x=2) + b.block("resource", __nested_builder__=inner) + result = b.build() + block = result["resource"][0] + self.assertEqual(len(block["sub_block"]), 2) + + +class TestBuilderBlockMarker(TestCase): + + def test_block_marker_is_is_block(self): + """Verify IS_BLOCK marker is used (not __start_line__/__end_line__).""" + b = Builder({"x": 1}) + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + self.assertNotIn("__start_line__", result) + self.assertNotIn("__end_line__", result) + + def test_nested_blocks_skip_is_block_key(self): + """_add_nested_blocks should skip IS_BLOCK when merging.""" + b = Builder() + inner = Builder() + inner.block("sub", val=1) + b.block("parent", __nested_builder__=inner) + result = b.build() + parent_block = result["parent"][0] + # sub blocks should be present, but IS_BLOCK from inner should not leak as a list + self.assertIn("sub", parent_block) + # IS_BLOCK should be a bool marker, not a list + self.assertTrue(parent_block[IS_BLOCK]) + + +class TestBuilderIntegration(TestCase): + + def test_full_document(self): + doc = Builder() + doc.block( + "resource", + labels=["aws_instance", "web"], + ami="ami-12345", + instance_type="t2.micro", + ) + doc.block( + "resource", + labels=["aws_s3_bucket", "data"], + bucket="my-bucket", + ) + result = doc.build() + self.assertEqual(len(result["resource"]), 2) + + web = result["resource"][0]["aws_instance"]["web"] + self.assertEqual(web["ami"], "ami-12345") + self.assertEqual(web["instance_type"], "t2.micro") + + data = result["resource"][1]["aws_s3_bucket"]["data"] + self.assertEqual(data["bucket"], "my-bucket") From c05273d26e1c751266f1c924a9a96f12ac5fcdc9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:58 +0100 Subject: [PATCH 22/45] reorganize "round-trip" tests into integration tests --- test/{round_trip => integration}/__init__.py | 0 .../hcl2_original/operators.tf} | 0 .../hcl2_original/smoke.tf | 0 .../hcl2_reconstructed/operators.tf} | 0 .../hcl2_reconstructed/smoke.tf | 0 .../json_reserialized/operators.json} | 0 .../json_reserialized/smoke.json | 0 .../json_serialized/operators.json} | 0 .../json_serialized/smoke.json | 0 .../specialized/builder_basic.json | 63 +++++++++++++++ test/integration/specialized/builder_basic.tf | 38 +++++++++ .../specialized/builder_basic_reparsed.json | 64 +++++++++++++++ .../builder_basic_reserialized.json | 62 +++++++++++++++ .../specialized}/operator_precedence.json | 0 .../specialized/operator_precedence.tf | 15 ++++ .../test_round_trip.py | 48 +++--------- test/integration/test_specialized.py | 77 +++++++++++++++++++ 17 files changed, 331 insertions(+), 36 deletions(-) rename test/{round_trip => integration}/__init__.py (100%) rename test/{round_trip/hcl2_original/operator_precedence.tf => integration/hcl2_original/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_original/smoke.tf (100%) rename test/{round_trip/hcl2_reconstructed/operator_precedence.tf => integration/hcl2_reconstructed/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_reconstructed/smoke.tf (100%) rename test/{round_trip/json_reserialized/operator_precedence.json => integration/json_reserialized/operators.json} (100%) rename test/{round_trip => integration}/json_reserialized/smoke.json (100%) rename test/{round_trip/json_serialized/operator_precedence.json => integration/json_serialized/operators.json} (100%) rename test/{round_trip => integration}/json_serialized/smoke.json (100%) create mode 100644 test/integration/specialized/builder_basic.json create mode 100644 test/integration/specialized/builder_basic.tf create mode 100644 test/integration/specialized/builder_basic_reparsed.json create mode 100644 test/integration/specialized/builder_basic_reserialized.json rename test/{round_trip/special => integration/specialized}/operator_precedence.json (100%) create mode 100644 test/integration/specialized/operator_precedence.tf rename test/{round_trip => integration}/test_round_trip.py (78%) create mode 100644 test/integration/test_specialized.py diff --git a/test/round_trip/__init__.py b/test/integration/__init__.py similarity index 100% rename from test/round_trip/__init__.py rename to test/integration/__init__.py diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/integration/hcl2_original/operators.tf similarity index 100% rename from test/round_trip/hcl2_original/operator_precedence.tf rename to test/integration/hcl2_original/operators.tf diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf similarity index 100% rename from test/round_trip/hcl2_original/smoke.tf rename to test/integration/hcl2_original/smoke.tf diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/integration/hcl2_reconstructed/operators.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/operator_precedence.tf rename to test/integration/hcl2_reconstructed/operators.tf diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/smoke.tf rename to test/integration/hcl2_reconstructed/smoke.tf diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/integration/json_reserialized/operators.json similarity index 100% rename from test/round_trip/json_reserialized/operator_precedence.json rename to test/integration/json_reserialized/operators.json diff --git a/test/round_trip/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json similarity index 100% rename from test/round_trip/json_reserialized/smoke.json rename to test/integration/json_reserialized/smoke.json diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/integration/json_serialized/operators.json similarity index 100% rename from test/round_trip/json_serialized/operator_precedence.json rename to test/integration/json_serialized/operators.json diff --git a/test/round_trip/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json similarity index 100% rename from test/round_trip/json_serialized/smoke.json rename to test/integration/json_serialized/smoke.json diff --git a/test/integration/specialized/builder_basic.json b/test/integration/specialized/builder_basic.json new file mode 100644 index 00000000..da62720b --- /dev/null +++ b/test/integration/specialized/builder_basic.json @@ -0,0 +1,63 @@ +{ + "__is_block__": true, + "resource": [ + { + "aws_instance": { + "web": { + "__is_block__": true, + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2 + } + } + }, + { + "aws_s3_bucket": { + "data": { + "__is_block__": true, + "bucket": "\"my-bucket\"", + "acl": "\"private\"" + } + } + }, + { + "aws_instance": { + "nested": { + "__is_block__": true, + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "__is_block__": true, + "command": "\"echo hello\"" + } + }, + { + "remote-exec": { + "__is_block__": true, + "inline": "[\"puppet apply\"]" + } + } + ] + } + } + } + ], + "variable": [ + { + "instance_type": { + "__is_block__": true, + "default": "\"t2.micro\"", + "description": "\"The instance type\"" + } + } + ], + "locals": [ + { + "__is_block__": true, + "port": 8080, + "enabled": true, + "name": "\"my-app\"" + } + ] +} diff --git a/test/integration/specialized/builder_basic.tf b/test/integration/specialized/builder_basic.tf new file mode 100644 index 00000000..b7ee2131 --- /dev/null +++ b/test/integration/specialized/builder_basic.tf @@ -0,0 +1,38 @@ +resource aws_instance web { + ami = "ami-12345" + instance_type = "t2.micro" + count = 2 +} + + +resource aws_s3_bucket data { + bucket = "my-bucket" + acl = "private" +} + + +resource aws_instance nested { + ami = "ami-99999" + + provisioner local-exec { + command = "echo hello" + } + + + provisioner remote-exec { + inline = ["puppet apply"] + } +} + + +variable instance_type { + default = "t2.micro" + description = "The instance type" +} + + +locals { + port = 8080 + enabled = true + name = "my-app" +} diff --git a/test/integration/specialized/builder_basic_reparsed.json b/test/integration/specialized/builder_basic_reparsed.json new file mode 100644 index 00000000..32e4954d --- /dev/null +++ b/test/integration/specialized/builder_basic_reparsed.json @@ -0,0 +1,64 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": [ + "\"puppet apply\"" + ], + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/builder_basic_reserialized.json b/test/integration/specialized/builder_basic_reserialized.json new file mode 100644 index 00000000..364ef0c3 --- /dev/null +++ b/test/integration/specialized/builder_basic_reserialized.json @@ -0,0 +1,62 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": "[\"puppet apply\"]", + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/integration/specialized/operator_precedence.json similarity index 100% rename from test/round_trip/special/operator_precedence.json rename to test/integration/specialized/operator_precedence.json diff --git a/test/integration/specialized/operator_precedence.tf b/test/integration/specialized/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/integration/specialized/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/test_round_trip.py b/test/integration/test_round_trip.py similarity index 78% rename from test/round_trip/test_round_trip.py rename to test/integration/test_round_trip.py index 93fcd111..3d2bbbb0 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -1,7 +1,7 @@ """Round-trip tests for the HCL2 → JSON → HCL2 pipeline. -Every test starts from the source HCL files in test/round_trip/hcl2/ and -runs the pipeline forward from there, comparing actuals against expected +Every test starts from the source HCL files in test/integration/hcl2_original/ +and runs the pipeline forward from there, comparing actuals against expected outputs at each stage: 1. HCL → JSON serialization (parse + transform + serialize) @@ -16,23 +16,20 @@ from typing import List from unittest import TestCase -from hcl2 import parses +from hcl2.api import parses_to_tree from hcl2.deserializer import BaseDeserializer from hcl2.formatter import BaseFormatter from hcl2.reconstructor import HCLReconstructor from hcl2.transformer import RuleTransformer -from hcl2.utils import SerializationOptions -ROUND_TRIP_DIR = Path(__file__).absolute().parent -HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" - -SPECIAL_DIR = ROUND_TRIP_DIR / "special" +INTEGRATION_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = INTEGRATION_DIR / "hcl2_original" _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, - "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", - "json_serialized": ROUND_TRIP_DIR / "json_serialized", - "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "hcl2_reconstructed": INTEGRATION_DIR / "hcl2_reconstructed", + "json_serialized": INTEGRATION_DIR / "json_serialized", + "json_reserialized": INTEGRATION_DIR / "json_reserialized", } _STEP_SUFFIXES = { @@ -53,7 +50,7 @@ class SuiteStep(Enum): def _get_suites() -> List[str]: """ Get a list of the test suites. - Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + Names of a test suite is a name of file in `test/integration/hcl2_original/` without the .tf suffix. Override SUITES to run a specific subset, e.g. SUITES = ["config"] """ @@ -63,7 +60,7 @@ def _get_suites() -> List[str]: # set this to arbitrary list of test suites to run, -# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/integration/hcl2_original/smoke.tf` SUITES: List[str] = [] @@ -72,9 +69,9 @@ def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) -def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: +def _parse_and_serialize(hcl_text: str, options=None) -> dict: """Parse HCL text and serialize to a Python dict.""" - parsed_tree = parses(hcl_text) + parsed_tree = parses_to_tree(hcl_text) rules = RuleTransformer().transform(parsed_tree) if options: return rules.serialize(options=options) @@ -192,24 +189,3 @@ def test_full_round_trip(self): f"Full round-trip mismatch for {suite}: " f"HCL → JSON → HCL → JSON did not produce identical JSON", ) - - -class TestOperatorPrecedence(TestCase): - """Test that parsed expressions correctly represent operator precedence. - - Serializes with force_operation_parentheses=True so that implicit - precedence becomes explicit parentheses in the output. - See: https://github.com/amplify-education/python-hcl2/issues/248 - """ - - maxDiff = None - _OPTIONS = SerializationOptions(force_operation_parentheses=True) - - def test_operator_precedence(self): - hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) - json_path = SPECIAL_DIR / "operator_precedence.json" - - actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) - expected = json.loads(json_path.read_text()) - - self.assertEqual(actual, expected) diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py new file mode 100644 index 00000000..d1b817e2 --- /dev/null +++ b/test/integration/test_specialized.py @@ -0,0 +1,77 @@ +"""Specialized integration tests for specific features and scenarios. + +Unlike the suite-based round-trip tests, these target individual features +(operator precedence, Builder round-trip) with dedicated golden files +in test/integration/special/. +""" + +import json +from pathlib import Path +from unittest import TestCase + +from hcl2.utils import SerializationOptions + +from test.integration.test_round_trip import ( + _parse_and_serialize, + _deserialize_and_reserialize, + _deserialize_and_reconstruct, +) + +SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = SPECIAL_DIR / "operator_precedence.tf" + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + +class TestBuilderRoundTrip(TestCase): + """Test that dicts produced by Builder can be deserialized, reconstructed to + valid HCL, and reparsed back to equivalent dicts. + + Pipeline: Builder.build() → from_dict → reconstruct → HCL text + HCL text → parse → serialize → dict (compare with expected) + """ + + maxDiff = None + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_builder_reconstruction(self): + """Builder dict → deserialize → reconstruct → compare with expected HCL.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + actual_hcl = _deserialize_and_reconstruct(builder_dict) + expected_hcl = self._load_special("builder_basic", ".tf") + self.assertMultiLineEqual(actual_hcl, expected_hcl) + + def test_builder_full_round_trip(self): + """Builder dict → reconstruct → reparse → compare with expected JSON.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reconstructed_hcl = _deserialize_and_reconstruct(builder_dict) + actual = _parse_and_serialize(reconstructed_hcl) + expected = json.loads(self._load_special("builder_basic_reparsed", ".json")) + self.assertEqual(actual, expected) + + def test_builder_reserialization(self): + """Builder dict → deserialize → reserialize → compare with expected dict.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reserialized = _deserialize_and_reserialize(builder_dict) + expected = json.loads(self._load_special("builder_basic_reserialized", ".json")) + self.assertEqual(reserialized, expected) From e33b728b865800fa21d292e69c0bcf9fa633d22d Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:17:55 +0100 Subject: [PATCH 23/45] increase coverage failure threshold --- .coveragerc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 30e6dc8c..1a959f1c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,8 +5,8 @@ omit = hcl2/lark_parser.py hcl2/version.py hcl2/__init__.py - hcl2/rules/__init__.py + hcl2/rules/__init__.py [report] show_missing = true -fail_under = 80 +fail_under = 95 From df62cc9ba07f5cf3e92ea60e14935f6c0d531ffc Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:18:23 +0100 Subject: [PATCH 24/45] add CLAUDE.md --- CLAUDE.md | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..8d5260ab --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,226 @@ +# HCL2 Parser Development Guidelines + +When working with this HCL2 parser codebase, follow these architectural principles and patterns. + +## Core Architecture Rules + +**ALWAYS** understand the bidirectional pipeline: + +``` +Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict/JSON +Reverse: Python Dict/JSON → LarkElement Tree → Lark Tree → HCL2 Text +``` + +**NEVER** bypass the LarkElement intermediate representation. It provides type safety and enables bidirectional transformations. + +**REMEMBER** that separation of concerns is key: + +- Grammar definition (`hcl2.lark`) — syntax rules +- Transformer (`transformer.py`) — Lark parse tree → LarkElement tree +- Serialization (`rules/*.serialize()`) — LarkElement tree → Python dict +- Deserializer (`deserializer.py`) — Python dict → LarkElement tree +- Formatter (`formatter.py`) — whitespace alignment and spacing on LarkElement trees +- Reconstructor (`reconstructor.py`) — LarkElement tree → HCL2 text via Lark + +### Public API Design + +**FOLLOW** the `json` module convention in `api.py`: + +- `load/loads` — HCL2 text → Python dict +- `dump/dumps` — Python dict → HCL2 text +- Intermediate stages for advanced usage: `parse/parses`, `parse_to_tree/parses_to_tree`, `transform`, `serialize`, `from_dict`, `from_json`, `reconstruct` +- All option parameters are keyword-only + +## Design Pattern Guidelines + +### Rule-Based Transformation Pattern + +**FOLLOW** the one-to-one mapping: each Lark grammar rule corresponds to exactly one `LarkRule` class. + +**ENSURE** every rule class: + +- Mirrors lark grammar definition +- Inherits from appropriate base class (`LarkRule` or `LarkToken`) +- Implements `lark_name()` returning the grammar rule name +- Provides typed property accessors for child elements +- Handles its own serialization logic via `serialize()` +- Defines `_children` static field with appropriate type hinting + +**LOCATE** transformation logic in `hcl2/transformer.py` + +### Type Safety Requirements + +**USE** abstract base classes from `hcl2/rules/abstract.py` to define contracts. + +**PROVIDE** comprehensive type hints for all rule children structures. + +**LEVERAGE** the generic token system in `hcl2/rules/tokens.py` for dynamic token creation with caching. + +### Modular Organization Rules + +**ORGANIZE** rules by domain responsibility: + +- **Structural rules** → `rules/base.py` +- **Container rules** → `rules/containers.py` +- **Expression rules** → `rules/expressions.py` +- **Literal rules** → `rules/literal_rules.py` +- **String rules** → `rules/strings.py` +- **Function rules** → `rules/functions.py` +- **Indexing rules** → `rules/indexing.py` +- **For-expression rules** → `rules/for_expressions.py` +- **Metadata rules** → `rules/whitespace.py` + +**NEVER** mix concerns across these domains. + +### Serialization Strategy Guidelines + +**IMPLEMENT** context-aware serialization using: + +- `SerializationOptions` for configuration +- `SerializationContext` for state tracking +- Context managers for temporary state changes + +**REFERENCE** implementation patterns in `hcl2/utils.py` + +**ENSURE** each rule type follows its serialization strategy: + +- Structural rules create nested dictionaries +- Container rules handle collections with optional wrapping +- Expression rules generate `${...}` interpolation when needed +- Literal rules convert to appropriate Python types + +## Critical Implementation Rules + +### Block vs Object Distinction + +**ALWAYS** preserve the semantic difference between HCL2 blocks and data objects. + +**USE** `__is_block__` markers to maintain semantic intent during round-trips. + +**IMPLEMENT** block recognition logic in deserializer that can distinguish blocks from regular objects. + +**HANDLE** multi-label blocks by implementing recursive label extraction algorithms. + +### Bidirectional Requirements + +**ENSURE** every serialization operation has a corresponding deserialization counterpart. + +**TEST** round-trip integrity: Parse → Serialize → Deserialize → Serialize should produce identical results. + +**REFERENCE** deserialization patterns in `hcl2/deserializer.py` + +### String Interpolation Handling + +**SUPPORT** nested expression evaluation within `${expression}` syntax. + +**HANDLE** escape sequences and literal text segments properly. + +**MAINTAIN** context awareness when generating interpolation strings. + +## Extension Guidelines + +### Adding New Language Constructs + +**FOLLOW** this exact sequence: + +1. Add grammar rules to `hcl2.lark` +1. Create rule classes following existing patterns +1. Add transformer methods to map grammar to rules +1. Implement serialization logic in rule classes +1. Update deserializer for round-trip support + +### Rule Implementation Conventions + +**ALWAYS** implement these methods/properties: + +- `lark_name()` static method +- Property accessors for child elements +- `serialize()` method with context support +- Type hints for `_children` structure + +**FOLLOW** naming conventions consistent with existing rules. + +### Testing Requirements + +**USE** `unittest.TestCase` as the test framework (not pytest). + +**ORGANIZE** tests into two directories: + +- `test/unit/` — granular tests that instantiate rule objects directly (no parsing) + - `test/unit/rules/` — one file per rules module (e.g., `test_expressions.py` covers `hcl2/rules/expressions.py`) + - `test/unit/test_api.py`, `test/unit/test_builder.py`, etc. — other module tests +- `test/integration/` — full-pipeline tests using golden files + - `test_round_trip.py` — suite-based step tests (HCL→JSON, JSON→JSON, JSON→HCL, full round-trip) that iterate over all suites in `hcl2_original/` + - `test_specialized.py` — feature-specific integration tests (operator precedence, Builder round-trip) with golden files in `specialized/` + +**USE** concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)` for testing `_wrap_into_parentheses` logic without the parser). + +**RUN** tests with: `python -m unittest discover -s test -p "test_*.py" -v` + +## Code Quality Rules + +### Type Safety Requirements + +**PROVIDE** full type hints to enable static analysis. + +**USE** proper inheritance hierarchies to catch errors at runtime. + +**IMPLEMENT** property-based access to prevent structural errors. + +### Performance Considerations + +**LEVERAGE** cached token creation to prevent duplicate instantiation. + +**IMPLEMENT** lazy evaluation for context-sensitive processing. + +**OPTIMIZE** tree traversal using parent-child references. + +### Maintainability Standards + +**ENSURE** each rule has single responsibility for one grammar construct. + +**FOLLOW** open/closed principle: extend via new rules, don't modify existing ones. + +**MAINTAIN** clear import dependencies and type relationships. + +## File Organization Standards + +**KEEP** core abstractions in `rules/abstract.py` + +**GROUP** domain-specific rules by functionality in separate files + +**SEPARATE** utility functions into dedicated modules + +**MAINTAIN** grammar definition independence from implementation + +**STRUCTURE** test infrastructure to support incremental validation + +## Common Pitfalls to Avoid + +**DO NOT** create direct transformations from parse tree to Python dict - always use LarkElement intermediate representation. + +**DO NOT** mix serialization concerns across rule types - each rule handles its own format. + +**DO NOT** ignore context when generating expressions - interpolation behavior depends on nesting. + +**DO NOT** forget to update both serialization and deserialization when adding new constructs. + +**DO NOT** bypass the factory pattern for token creation - use the cached `StringToken` system. + +## When Making Changes + +**ALWAYS** run round-trip tests after any modifications. + +**VERIFY** that new rules follow existing patterns and conventions. + +**UPDATE** both transformer and deserializer when adding language features. + +**MAINTAIN** type safety and proper inheritance relationships. + +**DOCUMENT** any new patterns or conventions introduced. + +This architecture enables robust HCL2 parsing with full round-trip fidelity while maintaining code quality and extensibility. + +## Keeping This File Current + +**PROACTIVELY** update this file when your work changes the architecture, file organization, module responsibilities, public API surface, or testing conventions described above. If you add, rename, move, or delete modules, rules files, test directories, or pipeline stages — reflect those changes here before finishing the task. Stale documentation is worse than no documentation. From cf33fb3a05cd67c09607904f2f5ba798e6c1e2e2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:17:55 +0100 Subject: [PATCH 25/45] increase coverage failure threshold --- .coveragerc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 30e6dc8c..3907df05 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,8 +5,8 @@ omit = hcl2/lark_parser.py hcl2/version.py hcl2/__init__.py - hcl2/rules/__init__.py + hcl2/rules/__init__.py [report] show_missing = true -fail_under = 80 +fail_under = 90 From 020d141cbb7619c32ebd46b7b30d8ed26c813aed Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 13:51:12 +0100 Subject: [PATCH 26/45] migrate some of existing round-trip tests to the new style, fix some related bugs --- hcl2/deserializer.py | 15 ++++++--- hcl2/hcl2.lark | 3 +- hcl2/rules/containers.py | 6 +++- hcl2/transformer.py | 3 ++ .../hcl2_original/floats.tf} | 0 .../hcl2_original}/nulls.tf | 0 test/integration/hcl2_original/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_original/smoke.tf | 11 ------- .../hcl2_original}/string_interpolations.tf | 6 ++-- .../hcl2_original}/unicode_strings.tf | 0 test/integration/hcl2_reconstructed/floats.tf | 26 ++++++++++++++++ test/integration/hcl2_reconstructed/nulls.tf | 11 +++++++ .../hcl2_reconstructed/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_reconstructed/smoke.tf | 11 ------- .../string_interpolations.tf | 9 ++++++ .../hcl2_reconstructed/unicode_strings.tf | 21 +++++++++++++ .../integration/json_reserialized/floats.json | 31 +++++++++++++++++++ test/integration/json_reserialized/nulls.json | 13 ++++++++ .../json_reserialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_reserialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_reserialized/unicode_strings.json | 21 +++++++++++++ test/integration/json_serialized/floats.json | 31 +++++++++++++++++++ test/integration/json_serialized/nulls.json | 13 ++++++++ .../json_serialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_serialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_serialized/unicode_strings.json | 21 +++++++++++++ test/unit/rules/test_containers.py | 4 +-- 33 files changed, 344 insertions(+), 62 deletions(-) rename test/{helpers/terraform-config/test_floats.tf => integration/hcl2_original/floats.tf} (100%) rename test/{helpers/terraform-config => integration/hcl2_original}/nulls.tf (100%) create mode 100644 test/integration/hcl2_original/object_keys.tf create mode 100644 test/integration/hcl2_original/resource_keyword_attribute.tf rename test/{helpers/terraform-config => integration/hcl2_original}/string_interpolations.tf (68%) rename test/{helpers/terraform-config => integration/hcl2_original}/unicode_strings.tf (100%) create mode 100644 test/integration/hcl2_reconstructed/floats.tf create mode 100644 test/integration/hcl2_reconstructed/nulls.tf create mode 100644 test/integration/hcl2_reconstructed/object_keys.tf create mode 100644 test/integration/hcl2_reconstructed/resource_keyword_attribute.tf create mode 100644 test/integration/hcl2_reconstructed/string_interpolations.tf create mode 100644 test/integration/hcl2_reconstructed/unicode_strings.tf create mode 100644 test/integration/json_reserialized/floats.json create mode 100644 test/integration/json_reserialized/nulls.json create mode 100644 test/integration/json_reserialized/object_keys.json create mode 100644 test/integration/json_reserialized/resource_keyword_attribute.json create mode 100644 test/integration/json_reserialized/string_interpolations.json create mode 100644 test/integration/json_reserialized/unicode_strings.json create mode 100644 test/integration/json_serialized/floats.json create mode 100644 test/integration/json_serialized/nulls.json create mode 100644 test/integration/json_serialized/object_keys.json create mode 100644 test/integration/json_serialized/resource_keyword_attribute.json create mode 100644 test/integration/json_serialized/string_interpolations.json create mode 100644 test/integration/json_serialized/unicode_strings.json diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 0ca91b48..a1f9733e 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -88,7 +88,12 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - result = StartRule([self._deserialize(value)]) + if isinstance(value, dict): + # Top-level dict is always a body (attributes + blocks), not an object + children = self._deserialize_block_elements(value) + result = StartRule([BodyRule(children)]) + else: + result = StartRule([self._deserialize(value)]) return result def loads(self, value: str) -> LarkElement: @@ -286,7 +291,7 @@ def _deserialize_object(self, value: dict) -> ObjectRule: return ObjectRule([LBRACE(), *children, RBRACE()]) - def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: if self._is_expression(key): key = ObjectElemKeyExpressionRule( [ @@ -295,7 +300,7 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if child is not None ] ) - elif "." in key: + elif isinstance(key, str) and "." in key: parts = key.split(".") children = [] for part in parts: @@ -313,8 +318,8 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: return ObjectElemRule(result) - def _is_expression(self, value: str) -> bool: - return value.startswith("${") and value.endswith("}") + def _is_expression(self, value: Any) -> bool: + return isinstance(value, str) and value.startswith("${") and value.endswith("}") def _is_block(self, value: Any) -> bool: """Simple check: if it's a list containing dicts with IS_BLOCK markers""" diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 63154efb..4a9f1ec6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -81,7 +81,8 @@ start : body // Body and basic constructs body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression +attribute : _attribute_name EQ expression +_attribute_name : identifier | keyword block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE // Whitespace and comments diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 4d7310c8..3f590c5c 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -96,7 +96,11 @@ def value(self) -> key_T: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return self.value.serialize(options, context) + result = self.value.serialize(options, context) + # Object keys must be strings for JSON compatibility + if isinstance(result, (int, float)): + result = str(result) + return result class ObjectElemKeyExpressionRule(LarkRule): diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 07230fe5..7de4f7e1 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -108,6 +108,9 @@ def block(self, meta: Meta, args) -> BlockRule: @v_args(meta=True) def attribute(self, meta: Meta, args) -> AttributeRule: + # _attribute_name is flattened, so args[0] may be KeywordRule or IdentifierRule + if isinstance(args[0], KeywordRule): + args[0] = IdentifierRule([NAME(args[0].token.value)], meta) return AttributeRule(args, meta) @v_args(meta=True) diff --git a/test/helpers/terraform-config/test_floats.tf b/test/integration/hcl2_original/floats.tf similarity index 100% rename from test/helpers/terraform-config/test_floats.tf rename to test/integration/hcl2_original/floats.tf diff --git a/test/helpers/terraform-config/nulls.tf b/test/integration/hcl2_original/nulls.tf similarity index 100% rename from test/helpers/terraform-config/nulls.tf rename to test/integration/hcl2_original/nulls.tf diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf new file mode 100644 index 00000000..913d5a42 --- /dev/null +++ b/test/integration/hcl2_original/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0: 0, + "foo": 1 + baz : 2, + (var.account) : 3 + (format("key_prefix_%s", local.foo)) : 4 + "prefix_${var.account}:${var.user}_suffix": 5, +} diff --git a/test/integration/hcl2_original/resource_keyword_attribute.tf b/test/integration/hcl2_original/resource_keyword_attribute.tf new file mode 100644 index 00000000..fca27d75 --- /dev/null +++ b/test/integration/hcl2_original/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource "custom_provider_resource" "resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index d741a6ac..99537532 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -43,17 +43,6 @@ block label1 label2 { } } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/integration/hcl2_original/string_interpolations.tf similarity index 68% rename from test/helpers/terraform-config/string_interpolations.tf rename to test/integration/hcl2_original/string_interpolations.tf index 582b4aac..f9ac4e18 100644 --- a/test/helpers/terraform-config/string_interpolations.tf +++ b/test/integration/hcl2_original/string_interpolations.tf @@ -1,6 +1,6 @@ -locals { - simple_interpolation = "prefix:${var.foo}-suffix" - embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo" +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" escaped_interpolation = "prefix:$${aws:username}-suffix" simple_and_escaped = "${"bar"}$${baz:bat}" diff --git a/test/helpers/terraform-config/unicode_strings.tf b/test/integration/hcl2_original/unicode_strings.tf similarity index 100% rename from test/helpers/terraform-config/unicode_strings.tf rename to test/integration/hcl2_original/unicode_strings.tf diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf new file mode 100644 index 00000000..810108b2 --- /dev/null +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -0,0 +1,26 @@ +locals { + simple_float = 123.456 + small_float = 0.123 + large_float = 9876543.21 + negative_float = -42.5 + negative_small = -0.001 + scientific_positive = 123000.0 + scientific_negative = 0.00987 + scientific_large = 6.022e+23 + integer_as_float = 100.0 + float_calculation = 10500.0 * 3.0 / 2.1 + float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_list = [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0, + ] + float_object = { + pi = 3.14159, + euler = 2.71828, + sqrt2 = 1.41421, + scientific = -12300.0, + } +} diff --git a/test/integration/hcl2_reconstructed/nulls.tf b/test/integration/hcl2_reconstructed/nulls.tf new file mode 100644 index 00000000..1e487789 --- /dev/null +++ b/test/integration/hcl2_reconstructed/nulls.tf @@ -0,0 +1,11 @@ +terraform = { + unary = !null, + binary = (a == null), + tuple = [ + null, + 1, + 2, + ], + single = null, + conditional = null ? null : null, +} diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf new file mode 100644 index 00000000..497e65a6 --- /dev/null +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0 = 0, + "foo" = 1, + baz = 2, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, + "prefix_${var.account}:${var.user}_suffix" = 5, +} diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf new file mode 100644 index 00000000..498777e0 --- /dev/null +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource"custom_provider_resource""resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 8f17d6d6..b2de26f3 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -39,17 +39,6 @@ block label1 label2 { } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/integration/hcl2_reconstructed/string_interpolations.tf b/test/integration/hcl2_reconstructed/string_interpolations.tf new file mode 100644 index 00000000..73df4715 --- /dev/null +++ b/test/integration/hcl2_reconstructed/string_interpolations.tf @@ -0,0 +1,9 @@ +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} diff --git a/test/integration/hcl2_reconstructed/unicode_strings.tf b/test/integration/hcl2_reconstructed/unicode_strings.tf new file mode 100644 index 00000000..8c4df70e --- /dev/null +++ b/test/integration/hcl2_reconstructed/unicode_strings.tf @@ -0,0 +1,21 @@ +locals { + basic_unicode = "Hello, 世界! こんにちは Привет नमस्ते" + unicode_escapes = "© ♥ ♪ ☠ ☺" + emoji_string = "🚀 🌍 🔥 🎉" + rtl_text = "English and العربية text mixed" + complex_unicode = "Python (파이썬) es 很棒的! ♥ αβγδ" + ascii = "ASCII: abc123" + emoji = "Emoji: 🚀🌍🔥🎉" + math = "Math: ∑∫√∞≠≤≥" + currency = "Currency: £€¥₹₽₩" + arrows = "Arrows: ←↑→↓↔↕" + cjk = "CJK: 你好世界안녕하세요こんにちは" + cyrillic = "Cyrillic: Привет мир" + special = "Special: ©®™§¶†‡" + mixed_content = <<-EOT + Line with interpolation: ${var.name} + Line with emoji: 👨‍👩‍👧‍👦 + Line with quotes: "quoted text" + Line with backslash: \escaped + EOT +} diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_reserialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/nulls.json b/test/integration/json_reserialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_reserialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_reserialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_reserialized/resource_keyword_attribute.json b/test/integration/json_reserialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_reserialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_reserialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_reserialized/unicode_strings.json b/test/integration/json_reserialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_reserialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_serialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/nulls.json b/test/integration/json_serialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_serialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_serialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_serialized/resource_keyword_attribute.json b/test/integration/json_serialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_serialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_serialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_serialized/unicode_strings.json b/test/integration/json_serialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_serialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index b49b3f38..5ae28df4 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -171,11 +171,11 @@ def test_serialize_identifier(self): def test_serialize_int_lit(self): rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) - self.assertEqual(rule.serialize(), 5) + self.assertEqual(rule.serialize(), "5") def test_serialize_float_lit(self): rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) - self.assertAlmostEqual(rule.serialize(), 3.14) + self.assertEqual(rule.serialize(), "3.14") def test_serialize_string(self): rule = ObjectElemKeyRule([_make_string_rule("k3")]) From 1ab1f0df96c356fc60e1d8152e9c3f9e784e6038 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 15:21:54 +0100 Subject: [PATCH 27/45] add unit tests for - hcl2/rules/base.py - hcl2/rules/for_expressions.py - hcl2/rules/functions.py add some related bugs --- hcl2/rules/tokens.py | 2 +- test/integration/hcl2_reconstructed/smoke.tf | 2 +- test/integration/json_reserialized/smoke.json | 2 +- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_base.py | 299 +++++++++++++++ test/unit/rules/test_for_expressions.py | 351 ++++++++++++++++++ test/unit/rules/test_functions.py | 165 ++++++++ 7 files changed, 819 insertions(+), 4 deletions(-) create mode 100644 test/unit/rules/test_base.py create mode 100644 test/unit/rules/test_for_expressions.py create mode 100644 test/unit/rules/test_functions.py diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index b02be66e..06d1611f 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -36,7 +36,7 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str -class StaticStringToken(LarkToken): +class StaticStringToken(StringToken): classes_by_value = {} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index b2de26f3..ae687bdd 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -28,7 +28,7 @@ block label1 label2 { f(a), provider::func::aa(), ] - j = func(a, b, c, d) + j = func(a, b, c, d ... ) k = a.b.5 l = a.*.b m = a[*][c].a.*.1 diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 670c5be3..718086ce 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -32,7 +32,7 @@ "${f(a)}", "${provider::func::aa()}" ], - "j": "${func(a, b, c, d)}", + "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", "l": "${a.*.b}", "m": "${a[*][c].a.*.1}", diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 670c5be3..718086ce 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -32,7 +32,7 @@ "${f(a)}", "${provider::func::aa()}" ], - "j": "${func(a, b, c, d)}", + "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", "l": "${a.*.b}", "m": "${a[*][c].a.*.1}", diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py new file mode 100644 index 00000000..cfb6d666 --- /dev/null +++ b/test/unit/rules/test_base.py @@ -0,0 +1,299 @@ +from unittest import TestCase + +from hcl2.const import IS_BLOCK +from hcl2.rules.base import AttributeRule, BodyRule, StartRule, BlockRule +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE, DBLQUOTE, STRING_CHARS, NL_OR_COMMENT +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_expr_term(value): + return ExprTermRule([StubExpression(value)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_attribute(name, value): + return AttributeRule([_make_identifier(name), EQ(), _make_expr_term(value)]) + + +def _make_block(labels, body_children=None): + """Build a BlockRule with the given labels and body children. + + labels: list of IdentifierRule or StringRule instances + body_children: list of children for the body, or None for empty body + """ + body = BodyRule(body_children or []) + children = list(labels) + [LBRACE(), body, RBRACE()] + return BlockRule(children) + + +# --- AttributeRule tests --- + + +class TestAttributeRule(TestCase): + def test_lark_name(self): + self.assertEqual(AttributeRule.lark_name(), "attribute") + + def test_identifier_property(self): + ident = _make_identifier("name") + attr = AttributeRule([ident, EQ(), _make_expr_term("value")]) + self.assertIs(attr.identifier, ident) + + def test_expression_property(self): + expr_term = _make_expr_term("value") + attr = AttributeRule([_make_identifier("name"), EQ(), expr_term]) + self.assertIs(attr.expression, expr_term) + + def test_serialize(self): + attr = _make_attribute("name", "value") + self.assertEqual(attr.serialize(), {"name": "value"}) + + def test_serialize_int_value(self): + attr = _make_attribute("count", 42) + self.assertEqual(attr.serialize(), {"count": 42}) + + def test_serialize_expression_value(self): + attr = _make_attribute("expr", "${var.x}") + self.assertEqual(attr.serialize(), {"expr": "${var.x}"}) + + +# --- BodyRule tests --- + + +class TestBodyRule(TestCase): + def test_lark_name(self): + self.assertEqual(BodyRule.lark_name(), "body") + + def test_serialize_empty(self): + body = BodyRule([]) + self.assertEqual(body.serialize(), {}) + + def test_serialize_single_attribute(self): + body = BodyRule([_make_attribute("name", "value")]) + self.assertEqual(body.serialize(), {"name": "value"}) + + def test_serialize_multiple_attributes(self): + body = BodyRule([_make_attribute("a", 1), _make_attribute("b", 2)]) + self.assertEqual(body.serialize(), {"a": 1, "b": 2}) + + def test_serialize_single_block(self): + block = _make_block([_make_identifier("resource")]) + body = BodyRule([block]) + result = body.serialize() + self.assertIn("resource", result) + self.assertIsInstance(result["resource"], list) + self.assertEqual(len(result["resource"]), 1) + self.assertTrue(result["resource"][0][IS_BLOCK]) + + def test_serialize_multiple_blocks_same_type(self): + block1 = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "first")], + ) + block2 = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "second")], + ) + body = BodyRule([block1, block2]) + result = body.serialize() + self.assertEqual(len(result["resource"]), 2) + self.assertEqual(result["resource"][0]["name"], "first") + self.assertEqual(result["resource"][1]["name"], "second") + + def test_serialize_mixed_attributes_and_blocks(self): + attr = _make_attribute("version", "1.0") + block = _make_block([_make_identifier("provider")]) + body = BodyRule([attr, block]) + result = body.serialize() + self.assertEqual(result["version"], "1.0") + self.assertIn("provider", result) + self.assertIsInstance(result["provider"], list) + + def test_serialize_comments_collected(self): + nlc = _make_nlc("# a comment\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=True)) + self.assertIn("__comments__", result) + + def test_serialize_comments_not_collected_without_option(self): + nlc = _make_nlc("# a comment\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=False)) + self.assertNotIn("__comments__", result) + + def test_serialize_bare_newlines_not_collected_as_comments(self): + nlc = _make_nlc("\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=True)) + self.assertNotIn("__comments__", result) + + def test_serialize_skips_newline_children(self): + nlc = _make_nlc("\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr, nlc]) + result = body.serialize() + # NLC children should not appear as keys + keys = [k for k in result.keys() if not k.startswith("__")] + self.assertEqual(keys, ["x"]) + + +# --- StartRule tests --- + + +class TestStartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StartRule.lark_name(), "start") + + def test_body_property(self): + body = BodyRule([]) + start = StartRule([body]) + self.assertIs(start.body, body) + + def test_serialize_delegates_to_body(self): + attr = _make_attribute("key", "val") + body = BodyRule([attr]) + start = StartRule([body]) + self.assertEqual(start.serialize(), body.serialize()) + + def test_serialize_empty_body(self): + start = StartRule([BodyRule([])]) + self.assertEqual(start.serialize(), {}) + + +# --- BlockRule tests --- + + +class TestBlockRule(TestCase): + def test_lark_name(self): + self.assertEqual(BlockRule.lark_name(), "block") + + def test_labels_property_single(self): + ident = _make_identifier("resource") + block = _make_block([ident]) + self.assertEqual(len(block.labels), 1) + self.assertIs(block.labels[0], ident) + + def test_labels_property_two(self): + i1 = _make_identifier("resource") + i2 = _make_identifier("aws_instance") + block = _make_block([i1, i2]) + self.assertEqual(len(block.labels), 2) + self.assertIs(block.labels[0], i1) + self.assertIs(block.labels[1], i2) + + def test_labels_property_three(self): + i1 = _make_identifier("resource") + i2 = _make_identifier("aws_instance") + s3 = _make_string_rule("example") + block = _make_block([i1, i2, s3]) + labels = block.labels + self.assertEqual(len(labels), 3) + self.assertIs(labels[0], i1) + self.assertIs(labels[1], i2) + self.assertIs(labels[2], s3) + + def test_body_property(self): + body = BodyRule([]) + ident = _make_identifier("resource") + block = BlockRule([ident, LBRACE(), body, RBRACE()]) + self.assertIs(block.body, body) + + def test_constructor_filters_tokens(self): + """LBRACE and RBRACE should not appear in labels or body.""" + ident = _make_identifier("resource") + body = BodyRule([]) + block = BlockRule([ident, LBRACE(), body, RBRACE()]) + # labels should only contain the identifier + self.assertEqual(len(block.labels), 1) + self.assertIs(block.labels[0], ident) + self.assertIs(block.body, body) + + def test_serialize_single_label_empty_body(self): + block = _make_block([_make_identifier("resource")]) + result = block.serialize() + self.assertEqual(result, {IS_BLOCK: True}) + + def test_serialize_single_label_with_body(self): + block = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "foo")], + ) + result = block.serialize() + self.assertEqual(result, {"name": "foo", IS_BLOCK: True}) + + def test_serialize_two_labels(self): + block = _make_block( + [_make_identifier("resource"), _make_identifier("aws_instance")], + [_make_attribute("ami", "abc")], + ) + result = block.serialize() + self.assertIn("aws_instance", result) + inner = result["aws_instance"] + self.assertEqual(inner, {"ami": "abc", IS_BLOCK: True}) + + def test_serialize_three_labels(self): + block = _make_block( + [ + _make_identifier("resource"), + _make_identifier("aws_instance"), + _make_string_rule("example"), + ], + [_make_attribute("ami", "abc")], + ) + result = block.serialize() + self.assertIn("aws_instance", result) + inner = result["aws_instance"] + self.assertIn('"example"', inner) + innermost = inner['"example"'] + self.assertEqual(innermost, {"ami": "abc", IS_BLOCK: True}) + + def test_serialize_explicit_blocks_false(self): + block = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "foo")], + ) + opts = SerializationOptions(explicit_blocks=False) + result = block.serialize(options=opts) + self.assertNotIn(IS_BLOCK, result) + self.assertEqual(result, {"name": "foo"}) + + def test_serialize_string_label(self): + block = _make_block( + [_make_identifier("resource"), _make_string_rule("my_label")], + [_make_attribute("x", 1)], + ) + result = block.serialize() + # StringRule serializes with quotes + self.assertIn('"my_label"', result) diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py new file mode 100644 index 00000000..febec643 --- /dev/null +++ b/test/unit/rules/test_for_expressions.py @@ -0,0 +1,351 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.for_expressions import ( + ForIntroRule, + ForCondRule, + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( + NAME, + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_for_intro_single(iter_name, iterable_value): + """Build ForIntroRule with a single iterator: for iter_name in iterable :""" + return ForIntroRule([ + FOR(), + _make_identifier(iter_name), + IN(), + StubExpression(iterable_value), + COLON(), + ]) + + +def _make_for_intro_dual(iter1_name, iter2_name, iterable_value): + """Build ForIntroRule with dual iterators: for iter1, iter2 in iterable :""" + return ForIntroRule([ + FOR(), + _make_identifier(iter1_name), + COMMA(), + _make_identifier(iter2_name), + IN(), + StubExpression(iterable_value), + COLON(), + ]) + + +def _make_for_cond(value): + """Build ForCondRule: if """ + return ForCondRule([IF(), StubExpression(value)]) + + +# --- ForIntroRule tests --- + + +class TestForIntroRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForIntroRule.lark_name(), "for_intro") + + def test_first_iterator_single(self): + ident = _make_identifier("v") + rule = ForIntroRule([FOR(), ident, IN(), StubExpression("items"), COLON()]) + self.assertIs(rule.first_iterator, ident) + + def test_first_iterator_dual(self): + i1 = _make_identifier("k") + i2 = _make_identifier("v") + rule = ForIntroRule([FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()]) + self.assertIs(rule.first_iterator, i1) + + def test_second_iterator_none_when_single(self): + rule = _make_for_intro_single("v", "items") + self.assertIsNone(rule.second_iterator) + + def test_second_iterator_present_when_dual(self): + i2 = _make_identifier("v") + rule = ForIntroRule([ + FOR(), + _make_identifier("k"), + COMMA(), + i2, + IN(), + StubExpression("items"), + COLON(), + ]) + self.assertIs(rule.second_iterator, i2) + + def test_iterable_property(self): + iterable = StubExpression("items") + rule = ForIntroRule([FOR(), _make_identifier("v"), IN(), iterable, COLON()]) + self.assertIs(rule.iterable, iterable) + + def test_serialize_single_iterator(self): + rule = _make_for_intro_single("v", "items") + self.assertEqual(rule.serialize(), "for v in items : ") + + def test_serialize_dual_iterator(self): + rule = _make_for_intro_dual("k", "v", "items") + self.assertEqual(rule.serialize(), "for k, v in items : ") + + def test_children_length(self): + rule = _make_for_intro_single("v", "items") + self.assertEqual(len(rule.children), 12) + + +# --- ForCondRule tests --- + + +class TestForCondRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForCondRule.lark_name(), "for_cond") + + def test_condition_expr_property(self): + cond_expr = StubExpression("cond") + rule = ForCondRule([IF(), cond_expr]) + self.assertIs(rule.condition_expr, cond_expr) + + def test_serialize(self): + rule = _make_for_cond("cond") + self.assertEqual(rule.serialize(), "if cond") + + def test_children_length(self): + rule = _make_for_cond("cond") + self.assertEqual(len(rule.children), 3) + + +# --- ForTupleExprRule tests --- + + +class TestForTupleExprRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForTupleExprRule.lark_name(), "for_tuple_expr") + + def test_for_intro_property(self): + intro = _make_for_intro_single("v", "items") + rule = ForTupleExprRule([LSQB(), intro, StubExpression("expr"), RSQB()]) + self.assertIs(rule.for_intro, intro) + + def test_value_expr_property(self): + value_expr = StubExpression("expr") + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + value_expr, + RSQB(), + ]) + self.assertIs(rule.value_expr, value_expr) + + def test_condition_none(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + self.assertIsNone(rule.condition) + + def test_condition_present(self): + cond = _make_for_cond("cond") + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + cond, + RSQB(), + ]) + self.assertIsInstance(rule.condition, ForCondRule) + self.assertIs(rule.condition, cond) + + def test_serialize_without_condition(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + self.assertEqual(rule.serialize(), "${[for v in items : expr]}") + + def test_serialize_with_condition(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + _make_for_cond("cond"), + RSQB(), + ]) + self.assertEqual(rule.serialize(), "${[for v in items : expr if cond]}") + + def test_serialize_inside_dollar_string(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "[for v in items : expr]") + + +# --- ForObjectExprRule tests --- + + +class TestForObjectExprRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForObjectExprRule.lark_name(), "for_object_expr") + + def test_for_intro_property(self): + intro = _make_for_intro_dual("k", "v", "items") + rule = ForObjectExprRule([ + LBRACE(), + intro, + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIs(rule.for_intro, intro) + + def test_key_expr_property(self): + key_expr = StubExpression("key") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + key_expr, + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIs(rule.key_expr, key_expr) + + def test_value_expr_property(self): + value_expr = StubExpression("value") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ]) + self.assertIs(rule.value_expr, value_expr) + + def test_ellipsis_none(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIsNone(rule.ellipsis) + + def test_ellipsis_present(self): + ellipsis = ELLIPSIS() + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ellipsis, + RBRACE(), + ]) + self.assertIs(rule.ellipsis, ellipsis) + + def test_condition_none(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIsNone(rule.condition) + + def test_condition_present(self): + cond = _make_for_cond("cond") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + cond, + RBRACE(), + ]) + self.assertIsInstance(rule.condition, ForCondRule) + self.assertIs(rule.condition, cond) + + def test_serialize_basic(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertEqual(rule.serialize(), "${{for k, v in items : key => value}}") + + def test_serialize_with_ellipsis(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ELLIPSIS(), + RBRACE(), + ]) + result = rule.serialize() + self.assertIn("...", result) + self.assertEqual(result, "${{for k, v in items : key => value...}}") + + def test_serialize_with_condition(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + _make_for_cond("cond"), + RBRACE(), + ]) + result = rule.serialize() + self.assertIn("if cond", result) + self.assertEqual(result, "${{for k, v in items : key => value if cond}}") diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py new file mode 100644 index 00000000..188d5edd --- /dev/null +++ b/test/unit/rules/test_functions.py @@ -0,0 +1,165 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.functions import ( + ArgumentsRule, + FunctionCallRule, + ProviderFunctionCallRule, +) +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_arguments(values, ellipsis=False): + """Build an ArgumentsRule from a list of stub values. + + values: list of serialization values for StubExpression args + ellipsis: if True, append an ELLIPSIS token + """ + children = [] + for i, val in enumerate(values): + if i > 0: + children.append(COMMA()) + children.append(StubExpression(val)) + if ellipsis: + children.append(ELLIPSIS()) + return ArgumentsRule(children) + + +def _make_function_call(func_names, arg_values=None, ellipsis=False): + """Build a FunctionCallRule. + + func_names: list of identifier name strings (e.g. ["func"] or ["ns", "mod", "func"]) + arg_values: optional list of stub values for arguments + ellipsis: if True, pass ellipsis to arguments + """ + children = [_make_identifier(name) for name in func_names] + children.append(LPAR()) + if arg_values is not None: + children.append(_make_arguments(arg_values, ellipsis)) + children.append(RPAR()) + return FunctionCallRule(children) + + +# --- ArgumentsRule tests --- + + +class TestArgumentsRule(TestCase): + def test_lark_name(self): + self.assertEqual(ArgumentsRule.lark_name(), "arguments") + + def test_has_ellipsis_false(self): + rule = _make_arguments(["a"]) + self.assertFalse(rule.has_ellipsis) + + def test_has_ellipsis_true(self): + rule = _make_arguments(["a", "b"], ellipsis=True) + self.assertTrue(rule.has_ellipsis) + + def test_arguments_single(self): + rule = _make_arguments(["a"]) + self.assertEqual(len(rule.arguments), 1) + + def test_arguments_multiple(self): + rule = _make_arguments(["a", "b", "c"]) + self.assertEqual(len(rule.arguments), 3) + + def test_serialize_single_arg(self): + rule = _make_arguments(["a"]) + self.assertEqual(rule.serialize(), "a") + + def test_serialize_with_ellipsis(self): + rule = _make_arguments(["a", "b"], ellipsis=True) + self.assertEqual(rule.serialize(), "a, b ...") + + +# --- FunctionCallRule tests --- + + +class TestFunctionCallRule(TestCase): + def test_lark_name(self): + self.assertEqual(FunctionCallRule.lark_name(), "function_call") + + def test_identifiers_single(self): + rule = _make_function_call(["func"]) + self.assertEqual(len(rule.identifiers), 1) + + def test_identifiers_multiple(self): + rule = _make_function_call(["ns", "mod", "func"]) + self.assertEqual(len(rule.identifiers), 3) + + def test_arguments_property_present(self): + rule = _make_function_call(["func"], ["a"]) + self.assertIsInstance(rule.arguments, ArgumentsRule) + + def test_arguments_property_none(self): + rule = _make_function_call(["func"]) + self.assertIsNone(rule.arguments) + + def test_serialize_simple_no_args(self): + rule = _make_function_call(["func"]) + self.assertEqual(rule.serialize(), "${func()}") + + def test_serialize_simple_with_args(self): + rule = _make_function_call(["func"], ["a", "b"]) + self.assertEqual(rule.serialize(), "${func(a, b)}") + + def test_serialize_inside_dollar_string(self): + rule = _make_function_call(["func"], ["a"]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "func(a)") + + +# --- ProviderFunctionCallRule tests --- + + +class TestProviderFunctionCallRule(TestCase): + def test_lark_name(self): + self.assertEqual(ProviderFunctionCallRule.lark_name(), "provider_function_call") + + def test_inherits_function_call_rule(self): + self.assertTrue(issubclass(ProviderFunctionCallRule, FunctionCallRule)) + + def test_serialize_provider_function(self): + children = [ + _make_identifier("ns"), + _make_identifier("mod"), + _make_identifier("func"), + LPAR(), + _make_arguments(["a"]), + RPAR(), + ] + rule = ProviderFunctionCallRule(children) + self.assertEqual(rule.serialize(), "${ns::mod::func(a)}") + + def test_serialize_inside_dollar_string(self): + children = [ + _make_identifier("ns"), + _make_identifier("mod"), + _make_identifier("func"), + LPAR(), + _make_arguments(["a"]), + RPAR(), + ] + rule = ProviderFunctionCallRule(children) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "ns::mod::func(a)") From 0a6b99684e5d338eba718592a2acc2d485c9539a Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 16:41:49 +0100 Subject: [PATCH 28/45] exclude abstract methods from test coverage report --- .coveragerc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.coveragerc b/.coveragerc index 3907df05..b40d5c58 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,3 +10,5 @@ omit = [report] show_missing = true fail_under = 90 +exclude_lines = + raise NotImplementedError From be1e4f13b999aa19ccdaab3b43758fed020693a2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 16:43:33 +0100 Subject: [PATCH 29/45] fix scientific notation preservation, function argument lookup during serialization, and block label spacing in reconstruction --- hcl2/reconstructor.py | 6 +++- hcl2/rules/functions.py | 2 +- hcl2/rules/literal_rules.py | 14 +++++++- hcl2/utils.py | 1 + test/integration/hcl2_reconstructed/floats.tf | 12 +++---- .../resource_keyword_attribute.tf | 2 +- test/integration/hcl2_reconstructed/smoke.tf | 2 +- .../integration/json_reserialized/floats.json | 16 +++++----- test/integration/json_reserialized/smoke.json | 2 +- test/integration/json_serialized/floats.json | 16 +++++----- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_functions.py | 19 ++++++++++- test/unit/rules/test_literal_rules.py | 32 ++++++++++++++++++- 13 files changed, 95 insertions(+), 31 deletions(-) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 1b5260ac..abfc21f6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -156,6 +156,10 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st result = [] rule_name = tree.data + # Check spacing BEFORE processing children, while _last_rule_name + # still reflects the previous sibling (not a child of this tree). + needs_space = self._should_add_space_before(tree, parent_rule_name) + if rule_name == UnaryOpRule.lark_name(): for i, child in enumerate(tree.children): result.extend(self._reconstruct_node(child, rule_name)) @@ -183,7 +187,7 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) - if self._should_add_space_before(tree, parent_rule_name): + if needs_space: result.insert(0, " ") # Update state tracking diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index 380b959b..49b20f65 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -82,7 +82,7 @@ def identifiers(self) -> List[IdentifierRule]: @property @lru_cache(maxsize=None) def arguments(self) -> Optional[ArgumentsRule]: - for child in self._children[2:6]: + for child in self._children: if isinstance(child, ArgumentsRule): return child diff --git a/hcl2/rules/literal_rules.py b/hcl2/rules/literal_rules.py index 2e5b8281..cb8396b4 100644 --- a/hcl2/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rules.abstract import LarkRule, LarkToken -from hcl2.utils import SerializationOptions, SerializationContext +from hcl2.utils import SerializationOptions, SerializationContext, to_dollar_string class TokenRule(LarkRule, ABC): @@ -42,6 +42,18 @@ class FloatLitRule(TokenRule): def lark_name() -> str: return "float_lit" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + value = self.token.value + # Scientific notation (e.g. 1.23e5) cannot survive a Python float() + # round-trip, so preserve it as a ${...} expression string. + if options.preserve_scientific_notation and isinstance(value, str) and "e" in value.lower(): + if context.inside_dollar_string: + return value + return to_dollar_string(value) + return self.token.serialize() + class BinaryOperatorRule(TokenRule): @staticmethod diff --git a/hcl2/utils.py b/hcl2/utils.py index 68c32ebc..b15dda8a 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -16,6 +16,7 @@ class SerializationOptions: explicit_blocks: bool = True preserve_heredocs: bool = True force_operation_parentheses: bool = False + preserve_scientific_notation: bool = True @dataclass diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf index 810108b2..23dc46fe 100644 --- a/test/integration/hcl2_reconstructed/floats.tf +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -4,23 +4,23 @@ locals { large_float = 9876543.21 negative_float = -42.5 negative_small = -0.001 - scientific_positive = 123000.0 - scientific_negative = 0.00987 + scientific_positive = 1.23e5 + scientific_negative = 9.87e-3 scientific_large = 6.022e+23 integer_as_float = 100.0 - float_calculation = 10500.0 * 3.0 / 2.1 - float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_calculation = 105e+2 * 3.0 / 2.1 + float_comparison = 5e1 > 2.3 ? 1.0 : 0.0 float_list = [ 1.1, 2.2, 3.3, -4.4, - 550.0, + 5.5e2, ] float_object = { pi = 3.14159, euler = 2.71828, sqrt2 = 1.41421, - scientific = -12300.0, + scientific = -123e+2, } } diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf index 498777e0..c9ada660 100644 --- a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -1,4 +1,4 @@ -resource"custom_provider_resource""resource_name" { +resource "custom_provider_resource" "resource_name" { name = "resource_name" attribute = "attribute_value" if = "attribute_value2" diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index ae687bdd..40e2597d 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -26,7 +26,7 @@ block label1 label2 { 3, ], f(a), - provider::func::aa(), + provider::func::aa(5), ] j = func(a, b, c, d ... ) k = a.b.5 diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json index 18078a18..8246516c 100644 --- a/test/integration/json_reserialized/floats.json +++ b/test/integration/json_reserialized/floats.json @@ -6,26 +6,26 @@ "large_float": 9876543.21, "negative_float": -42.5, "negative_small": -0.001, - "scientific_positive": 123000.0, - "scientific_negative": 0.00987, - "scientific_large": 6.022e+23, + "scientific_positive": "${1.23e5}", + "scientific_negative": "${9.87e-3}", + "scientific_large": "${6.022e+23}", "integer_as_float": 100.0, - "float_calculation": "${10500.0 * 3.0 / 2.1}", - "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_calculation": "${105e+2 * 3.0 / 2.1}", + "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", "float_list": [ 1.1, 2.2, 3.3, -4.4, - 550.0 + "${5.5e2}" ], "float_object": { "pi": 3.14159, "euler": 2.71828, "sqrt2": 1.41421, - "scientific": -12300.0 + "scientific": "${-123e+2}" }, "__is_block__": true } ] -} +} \ No newline at end of file diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 718086ce..5bcc702a 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -30,7 +30,7 @@ 3 ], "${f(a)}", - "${provider::func::aa()}" + "${provider::func::aa(5)}" ], "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json index 18078a18..8246516c 100644 --- a/test/integration/json_serialized/floats.json +++ b/test/integration/json_serialized/floats.json @@ -6,26 +6,26 @@ "large_float": 9876543.21, "negative_float": -42.5, "negative_small": -0.001, - "scientific_positive": 123000.0, - "scientific_negative": 0.00987, - "scientific_large": 6.022e+23, + "scientific_positive": "${1.23e5}", + "scientific_negative": "${9.87e-3}", + "scientific_large": "${6.022e+23}", "integer_as_float": 100.0, - "float_calculation": "${10500.0 * 3.0 / 2.1}", - "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_calculation": "${105e+2 * 3.0 / 2.1}", + "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", "float_list": [ 1.1, 2.2, 3.3, -4.4, - 550.0 + "${5.5e2}" ], "float_object": { "pi": 3.14159, "euler": 2.71828, "sqrt2": 1.41421, - "scientific": -12300.0 + "scientific": "${-123e+2}" }, "__is_block__": true } ] -} +} \ No newline at end of file diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 718086ce..5bcc702a 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -30,7 +30,7 @@ 3 ], "${f(a)}", - "${provider::func::aa()}" + "${provider::func::aa(5)}" ], "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 188d5edd..4c4b336e 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -7,7 +7,7 @@ ProviderFunctionCallRule, ) from hcl2.rules.literal_rules import IdentifierRule -from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR +from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR, StringToken from hcl2.utils import SerializationOptions, SerializationContext @@ -128,6 +128,23 @@ def test_serialize_inside_dollar_string(self): ctx = SerializationContext(inside_dollar_string=True) self.assertEqual(rule.serialize(context=ctx), "func(a)") + def test_arguments_with_colons_tokens(self): + """FunctionCallRule with COLONS tokens (provider syntax) should still find arguments.""" + COLONS = StringToken["COLONS"] + children = [ + _make_identifier("provider"), + COLONS("::"), + _make_identifier("func"), + COLONS("::"), + _make_identifier("aa"), + LPAR(), + _make_arguments([5]), + RPAR(), + ] + rule = FunctionCallRule(children) + self.assertIsNotNone(rule.arguments) + self.assertEqual(rule.serialize(), "${provider::func::aa(5)}") + # --- ProviderFunctionCallRule tests --- diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py index f6b8b94c..f8513c21 100644 --- a/test/unit/rules/test_literal_rules.py +++ b/test/unit/rules/test_literal_rules.py @@ -1,7 +1,6 @@ from unittest import TestCase from hcl2.rules.literal_rules import ( - TokenRule, KeywordRule, IdentifierRule, IntLitRule, @@ -9,6 +8,7 @@ BinaryOperatorRule, ) from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral +from hcl2.utils import SerializationContext, SerializationOptions class TestKeywordRule(TestCase): @@ -60,6 +60,36 @@ def test_serialize_returns_float(self): self.assertAlmostEqual(result, 3.14) self.assertIsInstance(result, float) + def test_serialize_scientific_notation_as_dollar_string(self): + """Scientific notation is preserved as ${...} to survive dict round-trip.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + self.assertEqual(rule.serialize(), "${1.23e5}") + + def test_serialize_scientific_negative_exponent(self): + rule = FloatLitRule([FloatLiteral("9.87e-3")]) + self.assertEqual(rule.serialize(), "${9.87e-3}") + + def test_serialize_scientific_inside_dollar_string(self): + """Inside a dollar string context, return raw value without wrapping.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "1.23e5") + + def test_serialize_regular_float_not_wrapped(self): + """Non-scientific floats should remain plain Python floats.""" + rule = FloatLitRule([FloatLiteral("123.456")]) + result = rule.serialize() + self.assertEqual(result, 123.456) + self.assertIsInstance(result, float) + + def test_serialize_scientific_disabled(self): + """With preserve_scientific_notation=False, returns plain float.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + opts = SerializationOptions(preserve_scientific_notation=False) + result = rule.serialize(options=opts) + self.assertEqual(result, 123000.0) + self.assertIsInstance(result, float) + class TestBinaryOperatorRule(TestCase): def test_lark_name(self): From 13ae15a99520541f14d175d3f0cb44433d41952c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 18:54:34 +0100 Subject: [PATCH 30/45] more robust heredocs serialization, add option to deserialize strings into heredocs --- hcl2/deserializer.py | 19 ++++- hcl2/rules/strings.py | 19 +++-- test/integration/specialized/heredocs.tf | 34 +++++++++ .../specialized/heredocs_flattened.json | 14 ++++ .../specialized/heredocs_preserved.json | 14 ++++ .../specialized/heredocs_restored.tf | 20 +++++ test/integration/test_specialized.py | 76 +++++++++++++++++++ test/unit/rules/test_strings.py | 70 ++++++++++++++++- 8 files changed, 256 insertions(+), 10 deletions(-) create mode 100644 test/integration/specialized/heredocs.tf create mode 100644 test/integration/specialized/heredocs_flattened.json create mode 100644 test/integration/specialized/heredocs_preserved.json create mode 100644 test/integration/specialized/heredocs_restored.tf diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index a1f9733e..167d21f9 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,4 +1,5 @@ import json +import re from abc import ABC, abstractmethod from dataclasses import dataclass from functools import cached_property @@ -62,7 +63,7 @@ @dataclass class DeserializerOptions: heredocs_to_strings: bool = False - indent_length: int = 2 + strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True @@ -156,6 +157,11 @@ def _deserialize_text(self, value: Any) -> LarkRule: if match: return self._deserialize_heredoc(value[1:-1], False) + if self.options.strings_to_heredocs: + inner = value[1:-1] + if '\\n' in inner: + return self._deserialize_string_as_heredoc(inner) + return self._deserialize_string(value) if self._is_expression(value): @@ -212,6 +218,17 @@ def _deserialize_heredoc( return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_string_as_heredoc(self, inner: str) -> HeredocTemplateRule: + """Convert a quoted string with escaped newlines back into a heredoc.""" + # Single-pass unescape: \\n → \n, \\" → ", \\\\ → \ + content = re.sub( + r'\\(n|"|\\)', + lambda m: '\n' if m.group(1) == 'n' else m.group(1), + inner, + ) + heredoc = f"< ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index 248ab173..b757f317 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -101,8 +101,10 @@ def serialize( match = HEREDOC_PATTERN.match(heredoc) if not match: raise RuntimeError(f"Invalid Heredoc token: {heredoc}") - heredoc = match.group(2) - + heredoc = match.group(2).rstrip(self._trim_chars) + heredoc = heredoc.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') + return f'"{heredoc}"' + result = heredoc.rstrip(self._trim_chars) return f'"{result}"' @@ -131,16 +133,21 @@ def serialize( raise RuntimeError(f"Invalid Heredoc token: {heredoc}") heredoc = match.group(2) - heredoc = heredoc.rstrip(self._trim_chars) + heredoc = heredoc.rstrip(self._trim_chars) lines = heredoc.split("\n") - + # calculate the min number of leading spaces in each line min_spaces = sys.maxsize for line in lines: leading_spaces = len(line) - len(line.lstrip(" ")) min_spaces = min(min_spaces, leading_spaces) - + # trim off that number of leading spaces from each line lines = [line[min_spaces:] for line in lines] - return '"' + "\n".join(lines) + '"' + + if not options.preserve_heredocs: + lines = [line.replace('\\', '\\\\').replace('"', '\\"') for line in lines] + + sep = "\\n" if not options.preserve_heredocs else "\n" + return '"' + sep.join(lines) + '"' \ No newline at end of file diff --git a/test/integration/specialized/heredocs.tf b/test/integration/specialized/heredocs.tf new file mode 100644 index 00000000..9fc16498 --- /dev/null +++ b/test/integration/specialized/heredocs.tf @@ -0,0 +1,34 @@ +locals { + simple = < str: + """Deserialize a Python dict and reconstruct HCL text with custom options.""" + deserializer = BaseDeserializer(deserializer_options) + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestHeredocs(TestCase): + """Test heredoc serialization, flattening, restoration, and round-trips. + + Scenarios: + 1. HCL with heredocs → JSON (preserve_heredocs=True) + 2. HCL with heredocs → JSON (preserve_heredocs=False, newlines escaped) + 3. Flattened JSON → HCL (strings_to_heredocs=True restores multiline) + 4. Full round-trip: flatten → restore → reparse → reflatten matches + """ + + maxDiff = None + _FLATTEN_OPTIONS = SerializationOptions(preserve_heredocs=False) + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_parse_preserves_heredocs(self): + """HCL → JSON with default options preserves heredoc markers.""" + hcl_text = self._load_special("heredocs", ".tf") + actual = _parse_and_serialize(hcl_text) + expected = json.loads(self._load_special("heredocs_preserved", ".json")) + self.assertEqual(actual, expected) + + def test_parse_flattens_heredocs(self): + """HCL → JSON with preserve_heredocs=False escapes newlines in quoted strings.""" + hcl_text = self._load_special("heredocs", ".tf") + actual = _parse_and_serialize(hcl_text, options=self._FLATTEN_OPTIONS) + expected = json.loads(self._load_special("heredocs_flattened", ".json")) + self.assertEqual(actual, expected) + + def test_flattened_to_hcl_restores_heredocs(self): + """Flattened JSON → HCL with strings_to_heredocs=True restores multiline heredocs.""" + flattened = json.loads(self._load_special("heredocs_flattened", ".json")) + d_opts = DeserializerOptions(strings_to_heredocs=True) + actual = _deserialize_and_reconstruct_with_options(flattened, d_opts) + expected = self._load_special("heredocs_restored", ".tf") + self.assertMultiLineEqual(actual, expected) + + def test_flatten_restore_round_trip(self): + """Flatten → restore → reparse → reflatten produces identical flattened JSON.""" + hcl_text = self._load_special("heredocs", ".tf") + + # Forward: HCL → flattened JSON + flattened = _parse_and_serialize(hcl_text, options=self._FLATTEN_OPTIONS) + + # Restore: flattened JSON → HCL with heredocs + d_opts = DeserializerOptions(strings_to_heredocs=True) + restored_hcl = _deserialize_and_reconstruct_with_options(flattened, d_opts) + + # Reflatten: restored HCL → flattened JSON + reflattened = _parse_and_serialize(restored_hcl, options=self._FLATTEN_OPTIONS) + + self.assertEqual( + reflattened, + flattened, + "Flatten → restore → reflatten did not produce identical JSON", + ) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index 67fec075..e7fb28f1 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -191,7 +191,43 @@ def test_serialize_no_preserve_multiline(self): rule = HeredocTemplateRule([token]) opts = SerializationOptions(preserve_heredocs=False) result = rule.serialize(opts) - self.assertEqual(result, '"line1\nline2"') + self.assertEqual(result, '"line1\\nline2"') + + def test_serialize_no_preserve_escapes_quotes(self): + token = HEREDOC_TEMPLATE('< Date: Mon, 2 Mar 2026 16:54:16 +0100 Subject: [PATCH 31/45] CLI rework --- .coveragerc | 3 +- cli/__init__.py | 0 cli/hcl_to_json.py | 118 ++++++++++++++ cli/helpers.py | 92 +++++++++++ cli/json_to_hcl.py | 124 +++++++++++++++ hcl2/__main__.py | 107 +------------ pyproject.toml | 5 +- test/unit/cli/__init__.py | 0 test/unit/cli/test_hcl_to_json.py | 254 ++++++++++++++++++++++++++++++ test/unit/cli/test_helpers.py | 174 ++++++++++++++++++++ test/unit/cli/test_json_to_hcl.py | 156 ++++++++++++++++++ 11 files changed, 925 insertions(+), 108 deletions(-) create mode 100644 cli/__init__.py create mode 100644 cli/hcl_to_json.py create mode 100644 cli/helpers.py create mode 100644 cli/json_to_hcl.py create mode 100644 test/unit/cli/__init__.py create mode 100644 test/unit/cli/test_hcl_to_json.py create mode 100644 test/unit/cli/test_helpers.py create mode 100644 test/unit/cli/test_json_to_hcl.py diff --git a/.coveragerc b/.coveragerc index b40d5c58..558bc244 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,11 +1,12 @@ [run] branch = true omit = - hcl2/__main__.py hcl2/lark_parser.py hcl2/version.py + hcl2/__main__.py hcl2/__init__.py hcl2/rules/__init__.py + cli/__init__.py [report] show_missing = true diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py new file mode 100644 index 00000000..faa9fb33 --- /dev/null +++ b/cli/hcl_to_json.py @@ -0,0 +1,118 @@ +"""``hcl2tojson`` CLI entry point — convert HCL2 files to JSON.""" +import argparse +import json +import os +from typing import IO + +from hcl2 import load +from hcl2.utils import SerializationOptions +from hcl2.version import __version__ +from .helpers import HCL_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin + + +def _hcl_to_json( + in_file: IO, out_file: IO, options: SerializationOptions, json_indent: int = None, +) -> None: + data = load(in_file, serialization_options=options) + json.dump(data, out_file, indent=json_indent) + + +def main(): + """The ``hcl2tojson`` console_scripts entry point.""" + parser = argparse.ArgumentParser( + description="Convert HCL2 files to JSON", + ) + parser.add_argument( + "-s", dest="skip", action="store_true", help="Skip un-parsable files" + ) + parser.add_argument( + "PATH", + help='The file or directory to convert (use "-" for stdin)', + ) + parser.add_argument( + "OUT_PATH", + nargs="?", + help="The path to write output to. Optional for single file (defaults to stdout)", + ) + parser.add_argument("--version", action="version", version=__version__) + + # SerializationOptions flags + parser.add_argument( + "--with-meta", + action="store_true", + help="Add meta parameters like __start_line__ and __end_line__", + ) + parser.add_argument( + "--with-comments", + action="store_true", + help="Include comments in the output", + ) + parser.add_argument( + "--wrap-objects", + action="store_true", + help="Wrap object values as an inline HCL2", + ) + parser.add_argument( + "--wrap-tuples", + action="store_true", + help="Wrap tuple values an inline HCL2", + ) + parser.add_argument( + "--no-explicit-blocks", + action="store_true", + help="Disable explicit block markers", + ) + parser.add_argument( + "--no-preserve-heredocs", + action="store_true", + help="Convert heredocs to plain strings", + ) + parser.add_argument( + "--force-parens", + action="store_true", + help="Force parentheses around all operations", + ) + parser.add_argument( + "--no-preserve-scientific", + action="store_true", + help="Convert scientific notation to standard floats", + ) + + # JSON output formatting + parser.add_argument( + "--json-indent", + type=int, + default=2, + metavar="N", + help="JSON indentation width (default: 2)", + ) + + args = parser.parse_args() + + options = SerializationOptions( + with_meta=args.with_meta, + with_comments=args.with_comments, + wrap_objects=args.wrap_objects, + wrap_tuples=args.wrap_tuples, + explicit_blocks=not args.no_explicit_blocks, + preserve_heredocs=not args.no_preserve_heredocs, + force_operation_parentheses=args.force_parens, + preserve_scientific_notation=not args.no_preserve_scientific, + ) + json_indent = args.json_indent + + def convert(in_file, out_file): + _hcl_to_json(in_file, out_file, options, json_indent=json_indent) + out_file.write("\n") + + if args.PATH == "-": + _convert_stdin(convert) + elif os.path.isfile(args.PATH): + _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE) + elif os.path.isdir(args.PATH): + _convert_directory( + args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE, + in_extensions={".tf", ".hcl"}, out_extension=".json", + ) + else: + raise RuntimeError("Invalid Path", args.PATH) diff --git a/cli/helpers.py b/cli/helpers.py new file mode 100644 index 00000000..bef6ba6f --- /dev/null +++ b/cli/helpers.py @@ -0,0 +1,92 @@ +"""Shared file-conversion helpers for the HCL2 CLI commands.""" +import json +import os +import sys +from typing import Callable, IO, Set, Tuple + +from lark import UnexpectedCharacters, UnexpectedToken + +# Exceptions that can be skipped when -s is passed +HCL_SKIPPABLE = (UnexpectedToken, UnexpectedCharacters, UnicodeDecodeError) +JSON_SKIPPABLE = (json.JSONDecodeError, UnicodeDecodeError) + + +def _convert_single_file( + in_path: str, + out_path: str, + convert_fn: Callable[[IO, IO], None], + skip: bool, + skippable: Tuple[type, ...], +) -> None: + with open(in_path, "r", encoding="utf-8") as in_file: + print(in_path, file=sys.stderr, flush=True) + if out_path is not None: + try: + with open(out_path, "w", encoding="utf-8") as out_file: + convert_fn(in_file, out_file) + except skippable: + if skip: + return + raise + else: + try: + convert_fn(in_file, sys.stdout) + sys.stdout.write("\n") + except skippable: + if skip: + return + raise + + +def _convert_directory( + in_path: str, + out_path: str, + convert_fn: Callable[[IO, IO], None], + skip: bool, + skippable: Tuple[type, ...], + in_extensions: Set[str], + out_extension: str, +) -> None: + if out_path is None: + raise RuntimeError("Positional OUT_PATH parameter shouldn't be empty") + if not os.path.exists(out_path): + os.mkdir(out_path) + + processed_files: set = set() + for current_dir, _, files in os.walk(in_path): + dir_prefix = os.path.commonpath([in_path, current_dir]) + relative_current_dir = os.path.relpath(current_dir, dir_prefix) + current_out_path = os.path.normpath( + os.path.join(out_path, relative_current_dir) + ) + if not os.path.exists(current_out_path): + os.mkdir(current_out_path) + for file_name in files: + _, ext = os.path.splitext(file_name) + if ext not in in_extensions: + continue + + in_file_path = os.path.join(current_dir, file_name) + out_file_path = os.path.join(current_out_path, file_name) + out_file_path = os.path.splitext(out_file_path)[0] + out_extension + + if in_file_path in processed_files or out_file_path in processed_files: + continue + + processed_files.add(in_file_path) + processed_files.add(out_file_path) + + with open(in_file_path, "r", encoding="utf-8") as in_file: + print(in_file_path, file=sys.stderr, flush=True) + try: + with open(out_file_path, "w", encoding="utf-8") as out_file: + convert_fn(in_file, out_file) + except skippable: + if skip: + continue + raise + + +def _convert_stdin(convert_fn: Callable[[IO, IO], None]) -> None: + convert_fn(sys.stdin, sys.stdout) + sys.stdout.write("\n") diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py new file mode 100644 index 00000000..48ade7c0 --- /dev/null +++ b/cli/json_to_hcl.py @@ -0,0 +1,124 @@ +"""``jsontohcl2`` CLI entry point — convert JSON files to HCL2.""" +import argparse +import json +import os +from typing import IO + +from hcl2 import dump +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from hcl2.version import __version__ +from .helpers import JSON_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin + + +def _json_to_hcl( + in_file: IO, + out_file: IO, + d_opts: DeserializerOptions, + f_opts: FormatterOptions, +) -> None: + data = json.load(in_file) + dump(data, out_file, deserializer_options=d_opts, formatter_options=f_opts) + + +def main(): + """The ``jsontohcl2`` console_scripts entry point.""" + parser = argparse.ArgumentParser( + description="Convert JSON files to HCL2", + ) + parser.add_argument( + "-s", dest="skip", action="store_true", help="Skip un-parsable files" + ) + parser.add_argument( + "PATH", + help='The file or directory to convert (use "-" for stdin)', + ) + parser.add_argument( + "OUT_PATH", + nargs="?", + help="The path to write output to. Optional for single file (defaults to stdout)", + ) + parser.add_argument("--version", action="version", version=__version__) + + # DeserializerOptions flags + parser.add_argument( + "--colon-separator", + action="store_true", + help="Use colons instead of equals in object elements", + ) + parser.add_argument( + "--no-trailing-comma", + action="store_true", + help="Omit trailing commas in object elements", + ) + parser.add_argument( + "--heredocs-to-strings", + action="store_true", + help="Convert heredocs to plain strings", + ) + parser.add_argument( + "--strings-to-heredocs", + action="store_true", + help="Convert strings containing escaped newlines to heredocs", + ) + + # FormatterOptions flags + parser.add_argument( + "--indent", + type=int, + default=2, + metavar="N", + help="Indentation width (default: 2)", + ) + parser.add_argument( + "--no-open-empty-blocks", + action="store_true", + help="Collapse empty blocks to a single line", + ) + parser.add_argument( + "--no-open-empty-objects", + action="store_true", + help="Collapse empty objects to a single line", + ) + parser.add_argument( + "--open-empty-tuples", + action="store_true", + help="Expand empty tuples across multiple lines", + ) + parser.add_argument( + "--no-align", + action="store_true", + help="Disable vertical alignment of attributes and object elements", + ) + + args = parser.parse_args() + + d_opts = DeserializerOptions( + object_elements_colon=args.colon_separator, + object_elements_trailing_comma=not args.no_trailing_comma, + heredocs_to_strings=args.heredocs_to_strings, + strings_to_heredocs=args.strings_to_heredocs, + ) + f_opts = FormatterOptions( + indent_length=args.indent, + open_empty_blocks=not args.no_open_empty_blocks, + open_empty_objects=not args.no_open_empty_objects, + open_empty_tuples=args.open_empty_tuples, + vertically_align_attributes=not args.no_align, + vertically_align_object_elements=not args.no_align, + ) + + def convert(in_file, out_file): + _json_to_hcl(in_file, out_file, d_opts, f_opts) + + if args.PATH == "-": + _convert_stdin(convert) + elif os.path.isfile(args.PATH): + _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE) + elif os.path.isdir(args.PATH): + _convert_directory( + args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE, + in_extensions={".json"}, out_extension=".tf", + ) + else: + raise RuntimeError("Invalid Path", args.PATH) diff --git a/hcl2/__main__.py b/hcl2/__main__.py index f1a58938..7431bb13 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -1,108 +1,5 @@ -#!/usr/bin/env python -""" -This script recursively converts hcl2 files to json - -Usage: - hcl2tojson [-s] PATH [OUT_PATH] - -Options: - -s Skip un-parsable files - PATH The path to convert - OUT_PATH The path to write files to - --with-meta If set add meta parameters to the output_json like __start_line__ and __end_line__ -""" -import argparse -import json -import os -import sys - -from lark import UnexpectedCharacters, UnexpectedToken - -from . import load -from .utils import SerializationOptions -from .version import __version__ - - -def main(): - """The `console_scripts` entry point""" - - parser = argparse.ArgumentParser( - description="This script recursively converts hcl2 files to json" - ) - parser.add_argument( - "-s", dest="skip", action="store_true", help="Skip un-parsable files" - ) - parser.add_argument("PATH", help="The file or directory to convert") - parser.add_argument( - "OUT_PATH", - nargs="?", - help="The path where to write files to. Optional when parsing a single file. " - "Output is printed to stdout if OUT_PATH is blank", - ) - parser.add_argument("--version", action="version", version=__version__) - parser.add_argument( - "--with-meta", - action="store_true", - help="If set add meta parameters to the output_json like __start_line__ and __end_line__", - ) - - args = parser.parse_args() - - skippable_exceptions = (UnexpectedToken, UnexpectedCharacters, UnicodeDecodeError) - - if os.path.isfile(args.PATH): - with open(args.PATH, "r", encoding="utf-8") as in_file: - # pylint: disable=R1732 - out_file = ( - sys.stdout - if args.OUT_PATH is None - else open(args.OUT_PATH, "w", encoding="utf-8") - ) - print(args.PATH, file=sys.stderr, flush=True) - options = SerializationOptions(with_meta=True) if args.with_meta else None - json.dump(load(in_file, serialization_options=options), out_file) - if args.OUT_PATH is None: - out_file.write("\n") - out_file.close() - elif os.path.isdir(args.PATH): - processed_files = set() - if args.OUT_PATH is None: - raise RuntimeError("Positional OUT_PATH parameter shouldn't be empty") - if not os.path.exists(args.OUT_PATH): - os.mkdir(args.OUT_PATH) - for current_dir, _, files in os.walk(args.PATH): - dir_prefix = os.path.commonpath([args.PATH, current_dir]) - relative_current_dir = os.path.relpath(current_dir, dir_prefix) - current_out_path = os.path.normpath( - os.path.join(args.OUT_PATH, relative_current_dir) - ) - if not os.path.exists(current_out_path): - os.mkdir(current_out_path) - for file_name in files: - in_file_path = os.path.join(current_dir, file_name) - out_file_path = os.path.join(current_out_path, file_name) - out_file_path = os.path.splitext(out_file_path)[0] + ".json" - - # skip any files that we already processed or generated to avoid loops and file lock errors - if in_file_path in processed_files or out_file_path in processed_files: - continue - - processed_files.add(in_file_path) - processed_files.add(out_file_path) - - with open(in_file_path, "r", encoding="utf-8") as in_file: - print(in_file_path, file=sys.stderr, flush=True) - try: - parsed_data = load(in_file) - except skippable_exceptions: - if args.skip: - continue - raise - with open(out_file_path, "w", encoding="utf-8") as out_file: - json.dump(parsed_data, out_file) - else: - raise RuntimeError("Invalid Path", args.PATH) - +"""Allow ``python -m hcl2`` to run the hcl2tojson command.""" +from cli.hcl_to_json import main if __name__ == "__main__": main() diff --git a/pyproject.toml b/pyproject.toml index 4440461a..0a9e0254 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,10 +40,11 @@ content-type = "text/markdown" Homepage = "https://github.com/amplify-education/python-hcl2" [project.scripts] -hcl2tojson = "hcl2.__main__:main" +hcl2tojson = "cli.hcl_to_json:main" +jsontohcl2 = "cli.json_to_hcl:main" [tool.setuptools] -packages = ["hcl2"] +packages = ["hcl2", "hcl2.rules", "cli"] zip-safe = false include-package-data = true diff --git a/test/unit/cli/__init__.py b/test/unit/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py new file mode 100644 index 00000000..67c8c48f --- /dev/null +++ b/test/unit/cli/test_hcl_to_json.py @@ -0,0 +1,254 @@ +import json +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.hcl_to_json import main + + +SIMPLE_HCL = 'x = 1\n' +SIMPLE_JSON_DICT = {"x": 1} + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def _read_file(path): + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +class TestHclToJson(TestCase): + + def test_single_file_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, SIMPLE_HCL) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertEqual(result["x"], 1) + + def test_single_file_to_output(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "test.json") + _write_file(hcl_path, SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", hcl_path, out_path]): + main() + + result = json.loads(_read_file(out_path)) + self.assertEqual(result["x"], 1) + + def test_stdin(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_HCL) + with patch("sys.argv", ["hcl2tojson", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertEqual(result["x"], 1) + + def test_directory_mode(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.tf"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "b.hcl"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "readme.txt"), "not hcl") + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.json"))) + self.assertTrue(os.path.exists(os.path.join(out_dir, "b.json"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "readme.json"))) + + result = json.loads(_read_file(os.path.join(out_dir, "a.json"))) + self.assertEqual(result["x"], 1) + + def test_with_meta_flag(self): + hcl_block = 'resource "a" "b" {\n x = 1\n}\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_block) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "--with-meta", hcl_path]): + with patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertIn("resource", result) + + def test_no_comments_flag(self): + hcl_with_comment = '# a comment\nx = 1\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_with_comment) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "--with-comments", hcl_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn("comment", output) + + def test_wrap_objects_flag(self): + hcl_input = 'x = {\n a = 1\n}\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_input) + + stdout_default = StringIO() + stdout_wrapped = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout_default): + main() + with patch("sys.argv", ["hcl2tojson", "--wrap-objects", hcl_path]): + with patch("sys.stdout", stdout_wrapped): + main() + + default = json.loads(stdout_default.getvalue()) + wrapped = json.loads(stdout_wrapped.getvalue()) + self.assertNotEqual(default["x"], wrapped["x"]) + + def test_wrap_tuples_flag(self): + hcl_input = 'x = [1, 2]\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_input) + + stdout_default = StringIO() + stdout_wrapped = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout_default): + main() + with patch("sys.argv", ["hcl2tojson", "--wrap-tuples", hcl_path]): + with patch("sys.stdout", stdout_wrapped): + main() + + default = json.loads(stdout_default.getvalue()) + wrapped = json.loads(stdout_wrapped.getvalue()) + self.assertNotEqual(default["x"], wrapped["x"]) + + def test_skip_flag(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "good.tf"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "bad.tf"), "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", "-s", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "good.json"))) + + def test_directory_requires_out_path(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + os.mkdir(in_dir) + _write_file(os.path.join(in_dir, "a.tf"), SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", in_dir]): + with self.assertRaises(RuntimeError): + main() + + def test_invalid_path_raises_error(self): + with patch("sys.argv", ["hcl2tojson", "/nonexistent/path/foo.tf"]): + with self.assertRaises(RuntimeError): + main() + + +class TestSingleFileErrorHandling(TestCase): + + def test_skip_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "out.json") + _write_file(in_path, "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): + main() + + if os.path.exists(out_path): + self.assertEqual(_read_file(out_path), "") + + def test_raise_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "out.json") + _write_file(in_path, "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", in_path, out_path]): + with self.assertRaises(Exception): + main() + + def test_skip_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + _write_file(in_path, "this is {{{{ not valid hcl") + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "-s", in_path]): + with patch("sys.stdout", stdout): + main() + + self.assertEqual(stdout.getvalue(), "") + + def test_raise_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + _write_file(in_path, "this is {{{{ not valid hcl") + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", in_path]): + with patch("sys.stdout", stdout): + with self.assertRaises(Exception): + main() + + +class TestDirectoryEdgeCases(TestCase): + + def test_subdirectory_creation(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + sub_dir = os.path.join(in_dir, "sub") + out_dir = os.path.join(tmpdir, "output") + os.makedirs(sub_dir) + + _write_file(os.path.join(sub_dir, "nested.tf"), SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + main() + + self.assertTrue( + os.path.exists(os.path.join(out_dir, "sub", "nested.json")) + ) + + def test_directory_raise_error_without_skip(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "bad.tf"), "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + with self.assertRaises(Exception): + main() diff --git a/test/unit/cli/test_helpers.py b/test/unit/cli/test_helpers.py new file mode 100644 index 00000000..6859d0ab --- /dev/null +++ b/test/unit/cli/test_helpers.py @@ -0,0 +1,174 @@ +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.helpers import _convert_single_file, _convert_directory, _convert_stdin + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +class TestConvertSingleFile(TestCase): + + def test_does_not_close_stdout(self): + """Regression test: stdout must not be closed after writing.""" + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "test.txt") + _write_file(path, "hello") + + captured = StringIO() + + def convert(in_f, out_f): + out_f.write(in_f.read()) + + with patch("sys.stdout", captured): + _convert_single_file(path, None, convert, False, (Exception,)) + + self.assertFalse(captured.closed) + self.assertIn("hello", captured.getvalue()) + + def test_skip_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + out_path = os.path.join(tmpdir, "out.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + _convert_single_file(in_path, out_path, convert, True, (ValueError,)) + + def test_raise_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + out_path = os.path.join(tmpdir, "out.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + with self.assertRaises(ValueError): + _convert_single_file(in_path, out_path, convert, False, (ValueError,)) + + def test_skip_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + stdout = StringIO() + with patch("sys.stdout", stdout): + _convert_single_file(in_path, None, convert, True, (ValueError,)) + + self.assertEqual(stdout.getvalue(), "") + + def test_raise_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + stdout = StringIO() + with patch("sys.stdout", stdout): + with self.assertRaises(ValueError): + _convert_single_file(in_path, None, convert, False, (ValueError,)) + + +class TestConvertDirectory(TestCase): + + def test_filters_by_extension(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.tf"), "content") + _write_file(os.path.join(in_dir, "b.txt"), "content") + + converted_files = [] + + def convert(in_f, out_f): + out_f.write(in_f.read()) + converted_files.append(True) + + _convert_directory( + in_dir, out_dir, convert, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + self.assertEqual(len(converted_files), 1) + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.json"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "b.json"))) + + def test_requires_out_path(self): + with tempfile.TemporaryDirectory() as tmpdir: + with self.assertRaises(RuntimeError): + _convert_directory( + tmpdir, None, lambda i, o: None, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + def test_subdirectory_creation(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + sub_dir = os.path.join(in_dir, "sub") + out_dir = os.path.join(tmpdir, "output") + os.makedirs(sub_dir) + + _write_file(os.path.join(sub_dir, "nested.tf"), "content") + + def convert(in_f, out_f): + out_f.write(in_f.read()) + + _convert_directory( + in_dir, out_dir, convert, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + self.assertTrue( + os.path.exists(os.path.join(out_dir, "sub", "nested.json")) + ) + + def test_raise_error_without_skip(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "bad.tf"), "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + with self.assertRaises(ValueError): + _convert_directory( + in_dir, out_dir, convert, False, (ValueError,), + in_extensions={".tf"}, out_extension=".json", + ) + + +class TestConvertStdin(TestCase): + + def test_stdin_forward(self): + stdout = StringIO() + captured = [] + + def convert(in_f, out_f): + data = in_f.read() + captured.append(data) + out_f.write("output") + + with patch("sys.stdin", StringIO("input")), \ + patch("sys.stdout", stdout): + _convert_stdin(convert) + + self.assertEqual(captured[0], "input") + self.assertIn("output", stdout.getvalue()) diff --git a/test/unit/cli/test_json_to_hcl.py b/test/unit/cli/test_json_to_hcl.py new file mode 100644 index 00000000..469d9188 --- /dev/null +++ b/test/unit/cli/test_json_to_hcl.py @@ -0,0 +1,156 @@ +import json +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.json_to_hcl import main + + +SIMPLE_JSON_DICT = {"x": 1} +SIMPLE_JSON = json.dumps(SIMPLE_JSON_DICT) + +BLOCK_JSON_DICT = { + "resource": [ + { + "aws_instance": [ + { + "example": [ + {"ami": "abc-123"} + ] + } + ] + } + ] +} +BLOCK_JSON = json.dumps(BLOCK_JSON_DICT) + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def _read_file(path): + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +class TestJsonToHcl(TestCase): + + def test_single_file_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, SIMPLE_JSON) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue().strip() + self.assertIn("x", output) + self.assertIn("1", output) + + def test_single_file_to_output(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + out_path = os.path.join(tmpdir, "test.tf") + _write_file(json_path, SIMPLE_JSON) + + with patch("sys.argv", ["jsontohcl2", json_path, out_path]): + main() + + output = _read_file(out_path) + self.assertIn("x", output) + self.assertIn("1", output) + + def test_stdin(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_JSON) + with patch("sys.argv", ["jsontohcl2", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + output = stdout.getvalue().strip() + self.assertIn("x", output) + self.assertIn("1", output) + + def test_directory_mode(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.json"), SIMPLE_JSON) + _write_file(os.path.join(in_dir, "readme.txt"), "not json") + + with patch("sys.argv", ["jsontohcl2", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.tf"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "readme.tf"))) + + def test_indent_flag(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, BLOCK_JSON) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--indent", "4", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn(" ami", output) + + def test_no_align_flag(self): + hcl_json = json.dumps({"short": 1, "very_long_name": 2}) + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, hcl_json) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--no-align", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + for line in output.strip().split("\n"): + line = line.strip() + if line.startswith("short"): + self.assertNotIn(" =", line) + + def test_colon_separator_flag(self): + hcl_json = json.dumps({"x": {"a": 1}}) + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, hcl_json) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--colon-separator", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn(":", output) + + def test_skip_flag_on_invalid_json(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "good.json"), SIMPLE_JSON) + _write_file(os.path.join(in_dir, "bad.json"), "{not valid json") + + with patch("sys.argv", ["jsontohcl2", "-s", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "good.tf"))) + + def test_invalid_path_raises_error(self): + with patch("sys.argv", ["jsontohcl2", "/nonexistent/path/foo.json"]): + with self.assertRaises(RuntimeError): + main() From 776a3f06128c62ae6e2ce3b342f7d4ebbfd2e5c3 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 2 Mar 2026 16:54:43 +0100 Subject: [PATCH 32/45] minor fixes --- hcl2/const.py | 3 +-- hcl2/deserializer.py | 10 +++++++--- test/integration/hcl2_original/smoke.tf | 2 +- test/integration/json_reserialized/floats.json | 2 +- .../json_reserialized/string_interpolations.json | 2 +- test/integration/json_serialized/floats.json | 2 +- .../json_serialized/string_interpolations.json | 2 +- test/integration/test_round_trip.py | 8 ++++---- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1bd4a4ce..c36a5321 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -1,5 +1,4 @@ """Module for various constants used across the library""" -START_LINE_KEY = "__start_line__" -END_LINE_KEY = "__end_line__" IS_BLOCK = "__is_block__" +COMMENTS_KEY = "__comments__" diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 167d21f9..5d308fb7 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -8,7 +8,7 @@ from regex import regex from hcl2.parser import parser as _get_parser -from hcl2.const import IS_BLOCK +from hcl2.const import IS_BLOCK, COMMENTS_KEY from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( BlockRule, @@ -129,7 +129,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: else: # otherwise it's just an attribute - if key != IS_BLOCK: + if not self._is_reserved_key(key): children.append(self._deserialize_attribute(key, val)) return children @@ -254,7 +254,7 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) while isinstance(body, dict) and not body.get(IS_BLOCK): - non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + non_block_keys = [k for k in body.keys() if not self._is_reserved_key(k)] if len(non_block_keys) == 1: # This is another label level label = non_block_keys[0] @@ -335,6 +335,10 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: return ObjectElemRule(result) + def _is_reserved_key(self, key: str) -> bool: + """Check if a key is a reserved metadata key that should be skipped during deserialization.""" + return key in (IS_BLOCK, COMMENTS_KEY) + def _is_expression(self, value: Any) -> bool: return isinstance(value, str) and value.startswith("${") and value.endswith("}") diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index 99537532..e2a0843b 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -37,7 +37,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json index 8246516c..db301445 100644 --- a/test/integration/json_reserialized/floats.json +++ b/test/integration/json_reserialized/floats.json @@ -28,4 +28,4 @@ "__is_block__": true } ] -} \ No newline at end of file +} diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json index 059fcfbf..f9df252c 100644 --- a/test/integration/json_reserialized/string_interpolations.json +++ b/test/integration/json_reserialized/string_interpolations.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json index 8246516c..db301445 100644 --- a/test/integration/json_serialized/floats.json +++ b/test/integration/json_serialized/floats.json @@ -28,4 +28,4 @@ "__is_block__": true } ] -} \ No newline at end of file +} diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json index 059fcfbf..f9df252c 100644 --- a/test/integration/json_serialized/string_interpolations.json +++ b/test/integration/json_serialized/string_interpolations.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py index 3d2bbbb0..a963c4f8 100644 --- a/test/integration/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -115,7 +115,7 @@ def test_hcl_to_json(self): self.assertEqual( actual, expected, - f"HCL → JSON serialization mismatch for {suite}", + f"HCL → JSON serialization mismatch for suite {suite}", ) @@ -137,7 +137,7 @@ def test_json_reserialization(self): self.assertEqual( actual, expected, - f"JSON reserialization mismatch for {suite}", + f"JSON reserialization mismatch for suite {suite}", ) @@ -159,7 +159,7 @@ def test_json_to_hcl(self): self.assertMultiLineEqual( actual, expected, - f"HCL reconstruction mismatch for {suite}", + f"HCL reconstruction mismatch for suite {suite}", ) @@ -186,6 +186,6 @@ def test_full_round_trip(self): self.assertEqual( reserialized, serialized, - f"Full round-trip mismatch for {suite}: " + f"Full round-trip mismatch for suite {suite}: " f"HCL → JSON → HCL → JSON did not produce identical JSON", ) From 745b1c70183578b84a14c9d5420bc7b54318fbd8 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 2 Mar 2026 17:44:27 +0100 Subject: [PATCH 33/45] fixes to for/tuple expressions formatting; unit tests for deserializer and formatter --- hcl2/deserializer.py | 1 + hcl2/formatter.py | 32 +- test/integration/hcl2_reconstructed/smoke.tf | 4 +- test/unit/test_deserializer.py | 571 ++++++++++++++ test/unit/test_formatter.py | 783 +++++++++++++++++++ 5 files changed, 1385 insertions(+), 6 deletions(-) create mode 100644 test/unit/test_deserializer.py create mode 100644 test/unit/test_formatter.py diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 5d308fb7..328427db 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -66,6 +66,7 @@ class DeserializerOptions: strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True + # with_comments: bool = False # TODO class LarkElementTreeDeserializer(ABC): diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 35fb6b05..23302187 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -14,6 +14,8 @@ from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, + ForIntroRule, + ForCondRule, ) from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA from hcl2.rules.whitespace import NewLineOrCommentRule @@ -161,10 +163,20 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) + elif isinstance(child, (ForIntroRule, ForCondRule)): + for sub_child in child.children: + if isinstance(sub_child, ExprTermRule): + self.format_expression(sub_child, indent_level + 1) - indexes = [1, 3, 5, 7] - for index in indexes: + for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) + + if expression.condition is not None: + expression.children[5] = self._build_newline(indent_level) + else: + expression.children[5] = None + + expression.children[7] = self._build_newline(indent_level) self._deindent_last_line() def format_forobjectexpr( @@ -173,11 +185,23 @@ def format_forobjectexpr( for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) + elif isinstance(child, (ForIntroRule, ForCondRule)): + for sub_child in child.children: + if isinstance(sub_child, ExprTermRule): + self.format_expression(sub_child, indent_level + 1) - indexes = [1, 3, 12] - for index in indexes: + for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) + expression.children[6] = None + expression.children[8] = None + + if expression.condition is not None: + expression.children[10] = self._build_newline(indent_level) + else: + expression.children[10] = None + + expression.children[12] = self._build_newline(indent_level) self._deindent_last_line() def _vertically_align_attributes_in_body(self, body: BodyRule): diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 40e2597d..970cc1cf 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -46,8 +46,8 @@ block { aws_account_ids = [ for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - ] - } ... if substr(bucket_name, 0, 1) == "l" + } ... + if substr(bucket_name, 0, 1) == "l" } } diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py new file mode 100644 index 00000000..908b1302 --- /dev/null +++ b/test/unit/test_deserializer.py @@ -0,0 +1,571 @@ +from unittest import TestCase + +from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.rules.base import StartRule, BodyRule, BlockRule, AttributeRule +from hcl2.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyExpressionRule, + ObjectElemKeyRule, +) +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import ( + StringRule, + StringPartRule, + InterpolationRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + STRING_CHARS, + ESCAPED_INTERPOLATION, + COMMA, + LSQB, + RSQB, + LBRACE, + RBRACE, + EQ, + COLON, +) + + +# --- helpers --- + + +def _deser(options=None): + return BaseDeserializer(options) + + +# --- DeserializerOptions tests --- + + +class TestDeserializerOptions(TestCase): + def test_defaults(self): + opts = DeserializerOptions() + self.assertFalse(opts.heredocs_to_strings) + self.assertFalse(opts.strings_to_heredocs) + self.assertFalse(opts.object_elements_colon) + self.assertTrue(opts.object_elements_trailing_comma) + + +# --- load_python top-level dispatch --- + + +class TestBaseDeserializerLoadPython(TestCase): + def test_dict_input_produces_start_with_body(self): + d = _deser() + result = d.load_python({"x": 1}) + self.assertIsInstance(result, StartRule) + self.assertIsInstance(result.body, BodyRule) + + def test_dict_body_contains_attribute(self): + d = _deser() + result = d.load_python({"x": 1}) + body = result.body + self.assertEqual(len(body.children), 1) + self.assertIsInstance(body.children[0], AttributeRule) + + def test_list_input_produces_start_wrapping_tuple(self): + d = _deser() + result = d.load_python([1, 2]) + self.assertIsInstance(result, StartRule) + # The child should be a TupleRule (via _deserialize) + child = result.children[0] + self.assertIsInstance(child, TupleRule) + + def test_scalar_string_input(self): + d = _deser() + result = d.load_python("hello") + self.assertIsInstance(result, StartRule) + child = result.children[0] + self.assertIsInstance(child, IdentifierRule) + self.assertEqual(child.token.value, "hello") + + def test_loads_parses_json(self): + d = _deser() + result = d.loads('{"key": 42}') + self.assertIsInstance(result, StartRule) + body = result.body + self.assertEqual(len(body.children), 1) + self.assertIsInstance(body.children[0], AttributeRule) + + +# --- _deserialize_text branches --- + + +class TestDeserializeText(TestCase): + def test_bool_true(self): + d = _deser() + result = d._deserialize_text(True) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "true") + + def test_bool_false(self): + d = _deser() + result = d._deserialize_text(False) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "false") + + def test_bool_before_int(self): + """bool is subclass of int; ensure True doesn't produce IntLitRule.""" + d = _deser() + result = d._deserialize_text(True) + self.assertNotIsInstance(result, IntLitRule) + self.assertIsInstance(result, IdentifierRule) + + def test_int_value(self): + d = _deser() + result = d._deserialize_text(42) + self.assertIsInstance(result, IntLitRule) + self.assertEqual(result.token.value, 42) + + def test_float_value(self): + d = _deser() + result = d._deserialize_text(3.14) + self.assertIsInstance(result, FloatLitRule) + self.assertEqual(result.token.value, 3.14) + + def test_quoted_string(self): + d = _deser() + result = d._deserialize_text('"hello"') + self.assertIsInstance(result, StringRule) + + def test_unquoted_string_identifier(self): + d = _deser() + result = d._deserialize_text("my_var") + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "my_var") + + def test_expression_string(self): + d = _deser() + result = d._deserialize_text("${var.x}") + self.assertIsInstance(result, ExprTermRule) + + def test_non_string_non_numeric_fallback(self): + """Non-string, non-numeric values get str()-converted to identifier.""" + d = _deser() + result = d._deserialize_text(None) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "None") + + def test_zero_int(self): + d = _deser() + result = d._deserialize_text(0) + self.assertIsInstance(result, IntLitRule) + self.assertEqual(result.token.value, 0) + + def test_negative_float(self): + d = _deser() + result = d._deserialize_text(-1.5) + self.assertIsInstance(result, FloatLitRule) + self.assertEqual(result.token.value, -1.5) + + +# --- heredoc handling --- + + +class TestDeserializeHeredocs(TestCase): + def test_preserved_heredoc(self): + d = _deser() + result = d._deserialize_text('"< value_name}""" + children = [ + LBRACE(), + _make_for_intro(), + _make_expr_term(_make_identifier(key_name)), + FOR_OBJECT_ARROW(), + _make_expr_term(_make_identifier(value_name)), + ] + if ellipsis: + children.append(ELLIPSIS()) + if condition is not None: + children.append(condition) + children.append(RBRACE()) + return ForObjectExprRule(children) + + +# --- format_fortupleexpr --- + + +class TestFormatForTupleExpr(TestCase): + def test_basic_no_condition_no_spurious_newline(self): + """No condition → index 5 should be None, no spurious blank line.""" + f = _fmt() + expr = _make_for_tuple_expr() + f.format_fortupleexpr(expr, indent_level=1) + + self.assertIsNone(expr.children[5]) + for idx in [1, 3, 7]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_basic_no_condition_deindents_closing(self): + """Last newline (before ]) should be deindented.""" + f = _fmt() + expr = _make_for_tuple_expr() + f.format_fortupleexpr(expr, indent_level=1) + + last_nl = expr.children[7] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_condition_newline_before_if(self): + """With condition → index 5 should be a newline before `if`.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_tuple_expr(condition=cond) + f.format_fortupleexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[5], NewLineOrCommentRule) + for idx in [1, 3, 7]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_with_condition_deindents_closing(self): + """Even with condition, last newline (before ]) is deindented.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_tuple_expr(condition=cond) + f.format_fortupleexpr(expr, indent_level=1) + + last_nl = expr.children[7] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_nested_value_object_formatting(self): + """Value expression containing an object should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + children = [ + LSQB(), + _make_for_intro(), + _make_expr_term(obj), + RSQB(), + ] + expr = ForTupleExprRule(children) + + f.format_fortupleexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + def test_for_intro_iterable_formatting(self): + """ForIntroRule's iterable expression should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + intro = ForIntroRule([ + FOR(), + _make_identifier("item"), + IN(), + _make_expr_term(obj), + COLON(), + ]) + children = [LSQB(), intro, _make_expr_term(_make_identifier("val")), RSQB()] + expr = ForTupleExprRule(children) + + f.format_fortupleexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + +# --- format_forobjectexpr --- + + +class TestFormatForObjectExpr(TestCase): + def test_basic_no_condition_no_ellipsis(self): + """No condition, no ellipsis → indices 6, 8, 10 should be None.""" + f = _fmt() + expr = _make_for_object_expr() + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + self.assertIsNone(expr.children[10]) + for idx in [1, 3, 12]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_basic_deindents_closing(self): + """Last newline (before }) should be deindented.""" + f = _fmt() + expr = _make_for_object_expr() + f.format_forobjectexpr(expr, indent_level=1) + + last_nl = expr.children[12] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_condition_newline_before_if(self): + """With condition → index 10 should be a newline before `if`.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[10], NewLineOrCommentRule) + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + + def test_with_condition_deindents_closing(self): + """Even with condition, last newline (before }) is deindented.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + last_nl = expr.children[12] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_ellipsis_and_condition(self): + """With ellipsis and condition → index 10 is newline, 6/8 cleared.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(ellipsis=True, condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[9], ELLIPSIS) + self.assertIsInstance(expr.children[10], NewLineOrCommentRule) + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + + def test_nested_value_tuple_formatting(self): + """Value expression containing a tuple should be formatted recursively.""" + f = _fmt() + inner_tup = _make_tuple([_make_expr_term(_make_identifier("a"))]) + children = [ + LBRACE(), + _make_for_intro(), + _make_expr_term(_make_identifier("k")), + FOR_OBJECT_ARROW(), + _make_expr_term(inner_tup), + RBRACE(), + ] + expr = ForObjectExprRule(children) + + f.format_forobjectexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in inner_tup._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + def test_for_cond_expression_formatting(self): + """ForCondRule's condition expression should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + cond = ForCondRule([IF(), _make_expr_term(obj)]) + expr = _make_for_object_expr(condition=cond) + + f.format_forobjectexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) From 648696e6c58405a61f5c9c41da83c8b01842ffb3 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 19:46:41 +0100 Subject: [PATCH 34/45] hcl2/rules - fix pre-commit errors --- hcl2/rules/abstract.py | 32 +++++++++++- hcl2/rules/base.py | 30 +++++++++-- hcl2/rules/containers.py | 60 ++++++++++++++++------ hcl2/rules/expressions.py | 59 ++++++++++++++++----- hcl2/rules/for_expressions.py | 71 ++++++++++++++++--------- hcl2/rules/functions.py | 37 +++++++++---- hcl2/rules/indexing.py | 82 +++++++++++++++++++++++------ hcl2/rules/literal_rules.py | 29 ++++++++++- hcl2/rules/strings.py | 60 +++++++++++++++------- hcl2/rules/tokens.py | 97 ++++++++++++++++++++++------------- hcl2/rules/whitespace.py | 23 +++++++-- 11 files changed, 433 insertions(+), 147 deletions(-) diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index 316c777a..26cda89c 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -1,3 +1,5 @@ +"""Abstract base classes for the LarkElement tree intermediate representation.""" + from abc import ABC, abstractmethod from typing import Any, Union, List, Optional, Callable @@ -8,33 +10,42 @@ class LarkElement(ABC): + """Base class for all elements in the LarkElement tree.""" + @staticmethod @abstractmethod def lark_name() -> str: + """Return the corresponding Lark grammar rule or token name.""" raise NotImplementedError() - def __init__(self, index: int = -1, parent: "LarkElement" = None): + def __init__(self, index: int = -1, parent: Optional["LarkElement"] = None): self._index = index self._parent = parent def set_index(self, i: int): + """Set the position index of this element within its parent.""" self._index = i def set_parent(self, node: "LarkElement"): + """Set the parent element that contains this element.""" self._parent = node @abstractmethod def to_lark(self) -> Any: + """Convert this element back to a Lark Tree or Token.""" raise NotImplementedError() @abstractmethod def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this element to a Python object (dict, list, str, etc.).""" raise NotImplementedError() class LarkToken(LarkElement, ABC): + """Base class for terminal token elements (leaves of the tree).""" + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -42,21 +53,26 @@ def __init__(self, value: Union[str, int, float]): @property @abstractmethod def serialize_conversion(self) -> Callable: + """Return the callable used to convert this token's value during serialization.""" raise NotImplementedError() @property def value(self): + """Return the raw value of this token.""" return self._value def set_value(self, value: Any): + """Set the raw value of this token.""" self._value = value def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this token using its serialize_conversion callable.""" return self.serialize_conversion(self.value) def to_lark(self) -> Token: + """Convert this token back to a Lark Token.""" return Token(self.lark_name(), self.value) def __str__(self) -> str: @@ -67,25 +83,39 @@ def __repr__(self) -> str: class LarkRule(LarkElement, ABC): + """Base class for non-terminal rule elements (internal nodes of the tree). + + Subclasses should declare `_children_layout: Tuple[...]` (without assignment) + to document the expected positional structure of `_children`. For variable-length + rules, use `_children_layout: List[Union[...]]`. This annotation exists only in + `__annotations__` and does not create an attribute or conflict with the runtime + `_children` list. + """ + @abstractmethod def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this rule and its children to a Python object.""" raise NotImplementedError() @property def children(self) -> List[LarkElement]: + """Return the list of child elements.""" return self._children @property def parent(self): + """Return the parent element.""" return self._parent @property def index(self): + """Return the position index within the parent.""" return self._index def to_lark(self) -> Tree: + """Convert this rule and its children back to a Lark Tree.""" result_children = [] for child in self._children: if child is None: diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index a025949a..26a31247 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 structural elements (attributes, bodies, blocks).""" + from collections import defaultdict from typing import Tuple, Any, List, Union, Optional @@ -5,7 +7,7 @@ from hcl2.const import IS_BLOCK from hcl2.rules.abstract import LarkRule, LarkToken -from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE @@ -15,7 +17,9 @@ class AttributeRule(LarkRule): - _children: Tuple[ + """Rule for key = value attribute assignments.""" + + _children_layout: Tuple[ IdentifierRule, EQ, ExprTermRule, @@ -23,25 +27,30 @@ class AttributeRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attribute" @property def identifier(self) -> IdentifierRule: + """Return the attribute name identifier.""" return self._children[0] @property def expression(self) -> ExprTermRule: + """Return the attribute value expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a single-entry dict.""" return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): + """Rule for a body containing attributes, blocks, and comments.""" - _children: List[ + _children_layout: List[ Union[ NewLineOrCommentRule, AttributeRule, @@ -51,11 +60,13 @@ class BodyRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "body" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a dict, grouping blocks under their type name.""" attribute_names = set() comments = [] inline_comments = [] @@ -92,26 +103,31 @@ def serialize( class StartRule(LarkRule): + """Rule for the top-level start rule of an HCL2 document.""" - _children: Tuple[BodyRule] + _children_layout: Tuple[BodyRule] @property def body(self) -> BodyRule: + """Return the document body.""" return self._children[0] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "start" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize by delegating to the body.""" return self.body.serialize(options) class BlockRule(LarkRule): + """Rule for HCL2 blocks (e.g. resource 'type' 'name' { ... }).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], LBRACE, @@ -128,19 +144,23 @@ def __init__(self, children, meta: Optional[Meta] = None): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "block" @property def labels(self) -> List[NAME]: + """Return the block label chain (type name, optional string labels).""" return list(filter(lambda label: label is not None, self._labels)) @property def body(self) -> BodyRule: + """Return the block body.""" return self._body def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a nested dict with labels as keys.""" result = self._body.serialize(options) if options.explicit_blocks: result.update({IS_BLOCK: True}) diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 3f590c5c..78e0bdeb 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 tuples, objects, and their elements.""" + from typing import Tuple, List, Optional, Union, Any from hcl2.rules.abstract import LarkRule @@ -32,8 +34,9 @@ class TupleRule(InlineCommentMixIn): + """Rule for tuple/array literals ([elem, ...]).""" - _children: Tuple[ + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], Tuple[ @@ -52,10 +55,12 @@ class TupleRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "tuple" @property def elements(self) -> List[ExpressionRule]: + """Return the expression elements of the tuple.""" return [ child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] @@ -63,6 +68,7 @@ def elements(self) -> List[ExpressionRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a Python list or bracketed string.""" if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] @@ -80,22 +86,26 @@ def serialize( class ObjectElemKeyRule(LarkRule): + """Rule for an object element key.""" key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] - _children: Tuple[key_T] + _children_layout: Tuple[key_T] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key" @property def value(self) -> key_T: + """Return the key value (identifier, string, or number).""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the key, coercing numbers to strings.""" result = self.value.serialize(options, context) # Object keys must be strings for JSON compatibility if isinstance(result, (int, float)): @@ -104,8 +114,9 @@ def serialize( class ObjectElemKeyExpressionRule(LarkRule): + """Rule for parenthesized expression keys in objects.""" - _children: Tuple[ + _children_layout: Tuple[ LPAR, ExpressionRule, RPAR, @@ -113,15 +124,18 @@ class ObjectElemKeyExpressionRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key_expression" @property def expression(self) -> ExpressionRule: + """Return the parenthesized key expression.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '(expression)' string.""" with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -130,8 +144,9 @@ def serialize( class ObjectElemKeyDotAccessor(LarkRule): + """Rule for dot-accessor keys in objects (e.g. a.b.c).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Tuple[ IdentifierRule, @@ -141,23 +156,27 @@ class ObjectElemKeyDotAccessor(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key_dot_accessor" @property def identifiers(self) -> List[IdentifierRule]: + """Return the chain of identifiers.""" return [child for child in self._children if isinstance(child, IdentifierRule)] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'a.b.c' string.""" return ".".join( identifier.serialize(options, context) for identifier in self.identifiers ) class ObjectElemRule(LarkRule): + """Rule for a single key = value element in an object.""" - _children: Tuple[ + _children_layout: Tuple[ ObjectElemKeyRule, Union[EQ, COLON], ExpressionRule, @@ -165,19 +184,23 @@ class ObjectElemRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem" @property def key(self) -> ObjectElemKeyRule: + """Return the key rule.""" return self._children[0] @property def expression(self): + """Return the value expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a single-entry dict.""" return { self.key.serialize(options, context): self.expression.serialize( options, context @@ -186,8 +209,9 @@ def serialize( class ObjectRule(InlineCommentMixIn): + """Rule for object literals ({key = value, ...}).""" - _children: Tuple[ + _children_layout: Tuple[ LBRACE, Optional[NewLineOrCommentRule], Tuple[ @@ -201,10 +225,12 @@ class ObjectRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object" @property def elements(self) -> List[ObjectElemRule]: + """Return the list of object element rules.""" return [ child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] @@ -212,21 +238,23 @@ def elements(self) -> List[ObjectElemRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a Python dict or braced string.""" if not options.wrap_objects and not context.inside_dollar_string: - result = {} + dict_result: dict = {} for element in self.elements: - result.update(element.serialize(options, context)) - - return result + dict_result.update(element.serialize(options, context)) + return dict_result with context.modify(inside_dollar_string=True): - result = "{" - result += ", ".join( - f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + str_result = "{" + str_result += ", ".join( + f"{element.key.serialize(options, context)}" + f" = " + f"{element.expression.serialize(options, context)}" for element in self.elements ) - result += "}" + str_result += "}" if not context.inside_dollar_string: - result = to_dollar_string(result) - return result + str_result = to_dollar_string(str_result) + return str_result diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index 1e1d0cd8..e6aa1670 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -1,6 +1,7 @@ +"""Rule classes for HCL2 expressions, conditionals, and binary/unary operations.""" + from abc import ABC -from copy import deepcopy -from typing import Any, Tuple, Optional +from typing import Any, Optional, Tuple from lark.tree import Meta @@ -22,8 +23,11 @@ class ExpressionRule(InlineCommentMixIn, ABC): + """Base class for all HCL2 expression rules.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "expression" def __init__( @@ -33,8 +37,12 @@ def __init__( self._parentheses = parentheses def _wrap_into_parentheses( - self, value: str, options=SerializationOptions(), context=SerializationContext() + self, + value: str, + _options=SerializationOptions(), + context=SerializationContext(), ) -> str: + """Wrap value in parentheses if inside a nested expression.""" # do not wrap into parentheses if # 1. already wrapped or # 2. is top-level expression (unless explicitly wrapped) @@ -53,8 +61,9 @@ def _wrap_into_parentheses( class ExprTermRule(ExpressionRule): + """Rule for expression terms, optionally wrapped in parentheses.""" - type_ = Tuple[ + _children_layout: Tuple[ Optional[LPAR], Optional[NewLineOrCommentRule], ExpressionRule, @@ -62,10 +71,9 @@ class ExprTermRule(ExpressionRule): Optional[RPAR], ] - _children: type_ - @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -84,15 +92,18 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def parentheses(self) -> bool: + """Return whether this term is wrapped in parentheses.""" return self._parentheses @property def expression(self) -> ExpressionRule: + """Return the inner expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize, handling parenthesized expression wrapping.""" with context.modify( inside_parentheses=self.parentheses or context.inside_parentheses ): @@ -107,8 +118,9 @@ def serialize( class ConditionalRule(ExpressionRule): + """Rule for ternary conditional expressions (condition ? true : false).""" - _children: Tuple[ + _children_layout: Tuple[ ExpressionRule, QMARK, Optional[NewLineOrCommentRule], @@ -121,6 +133,7 @@ class ConditionalRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -129,19 +142,23 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def condition(self) -> ExpressionRule: + """Return the condition expression.""" return self._children[0] @property def if_true(self) -> ExpressionRule: + """Return the true-branch expression.""" return self._children[3] @property def if_false(self) -> ExpressionRule: + """Return the false-branch expression.""" return self._children[7] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to ternary expression string.""" with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " @@ -159,8 +176,9 @@ def serialize( class BinaryTermRule(ExpressionRule): + """Rule for the operator+operand portion of a binary operation.""" - _children: Tuple[ + _children_layout: Tuple[ BinaryOperatorRule, Optional[NewLineOrCommentRule], ExprTermRule, @@ -168,6 +186,7 @@ class BinaryTermRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -176,20 +195,27 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def binary_operator(self) -> BinaryOperatorRule: + """Return the binary operator.""" return self._children[0] @property def expr_term(self) -> ExprTermRule: + """Return the right-hand operand.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" + """Serialize to 'operator operand' string.""" + op_str = self.binary_operator.serialize(options, context) + term_str = self.expr_term.serialize(options, context) + return f"{op_str} {term_str}" class BinaryOpRule(ExpressionRule): - _children: Tuple[ + """Rule for complete binary operations (lhs operator rhs).""" + + _children_layout: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], @@ -197,20 +223,23 @@ class BinaryOpRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_op" @property def expr_term(self) -> ExprTermRule: + """Return the left-hand operand.""" return self._children[0] @property def binary_term(self) -> BinaryTermRule: + """Return the binary term (operator + right-hand operand).""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to 'lhs operator rhs' string.""" with context.modify(inside_dollar_string=True): lhs = self.expr_term.serialize(options, context) operator = self.binary_term.binary_operator.serialize(options, context) @@ -227,25 +256,29 @@ def serialize( class UnaryOpRule(ExpressionRule): + """Rule for unary operations (e.g. negation, logical not).""" - _children: Tuple[LarkToken, ExprTermRule] + _children_layout: Tuple[LarkToken, ExprTermRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "unary_op" @property def operator(self) -> str: + """Return the unary operator string.""" return str(self._children[0]) @property def expr_term(self): + """Return the operand.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to 'operator operand' string.""" with context.modify(inside_dollar_string=True): result = f"{self.operator}{self.expr_term.serialize(options, context)}" diff --git a/hcl2/rules/for_expressions.py b/hcl2/rules/for_expressions.py index a1f24dcb..a062e66a 100644 --- a/hcl2/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -1,8 +1,9 @@ +"""Rule classes for HCL2 for-tuple and for-object expressions.""" + from typing import Any, Tuple, Optional, List from lark.tree import Meta -from hcl2.rules.abstract import LarkRule, LarkElement from hcl2.rules.expressions import ExpressionRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.tokens import ( @@ -17,6 +18,7 @@ COLON, ELLIPSIS, FOR_OBJECT_ARROW, + StaticStringToken, ) from hcl2.rules.whitespace import ( NewLineOrCommentRule, @@ -32,7 +34,7 @@ class ForIntroRule(InlineCommentMixIn): """Rule for the intro part of for expressions: 'for key, value in collection :'""" - _children: Tuple[ + _children_layout: Tuple[ FOR, Optional[NewLineOrCommentRule], IdentifierRule, @@ -49,6 +51,7 @@ class ForIntroRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): @@ -56,7 +59,10 @@ def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional comma and second identifier.""" identifiers = [child for child in children if isinstance(child, IdentifierRule)] second_identifier = identifiers[1] if len(identifiers) == 2 else None @@ -67,27 +73,28 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): super()._insert_optionals(children, sorted(indexes)) if second_identifier is not None: - children[3] = COMMA() + children[3] = COMMA() # type: ignore[abstract] # pylint: disable=abstract-class-instantiated children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: - """Returns the first iterator""" + """Return the first iterator identifier.""" return self._children[2] @property def second_iterator(self) -> Optional[IdentifierRule]: - """Returns the second iterator or None if not present""" + """Return the second iterator identifier, or None if not present.""" return self._children[4] @property def iterable(self) -> ExpressionRule: - """Returns the collection expression being iterated over""" + """Return the collection expression being iterated over.""" return self._children[8] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> str: + """Serialize to 'for key, value in collection : ' string.""" result = "for " result += f"{self.first_iterator.serialize(options, context)}" @@ -101,7 +108,7 @@ def serialize( class ForCondRule(InlineCommentMixIn): """Rule for the optional condition in for expressions: 'if condition'""" - _children: Tuple[ + _children_layout: Tuple[ IF, Optional[NewLineOrCommentRule], ExpressionRule, # condition expression @@ -109,6 +116,7 @@ class ForCondRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): @@ -117,19 +125,20 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def condition_expr(self) -> ExpressionRule: - """Returns the condition expression""" + """Return the condition expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> str: + """Serialize to 'if condition' string.""" return f"if {self.condition_expr.serialize(options, context)}" class ForTupleExprRule(ExpressionRule): """Rule for tuple/array for expressions: [for item in items : expression]""" - _children: Tuple[ + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], ForIntroRule, @@ -143,13 +152,17 @@ class ForTupleExprRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional condition.""" condition = None for child in children: @@ -168,23 +181,23 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): @property def for_intro(self) -> ForIntroRule: - """Returns the for intro rule""" + """Return the for intro rule.""" return self._children[2] @property def value_expr(self) -> ExpressionRule: - """Returns the value expression""" + """Return the value expression.""" return self._children[4] @property def condition(self) -> Optional[ForCondRule]: - """Returns the optional condition rule""" + """Return the optional condition rule.""" return self._children[6] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to '[for ... : expr]' string.""" result = "[" with context.modify(inside_dollar_string=True): @@ -203,7 +216,7 @@ def serialize( class ForObjectExprRule(ExpressionRule): """Rule for object for expressions: {for key, value in items : key => value}""" - _children: Tuple[ + _children_layout: Tuple[ LBRACE, Optional[NewLineOrCommentRule], ForIntroRule, @@ -222,18 +235,26 @@ class ForObjectExprRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional ellipsis and condition.""" ellipsis_ = None condition = None for child in children: - if ellipsis_ is None and isinstance(child, ELLIPSIS): + if ( + ellipsis_ is None + and isinstance(child, StaticStringToken) + and child.lark_name() == "ELLIPSIS" + ): ellipsis_ = child if condition is None and isinstance(child, ForCondRule): condition = child @@ -252,33 +273,33 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): @property def for_intro(self) -> ForIntroRule: - """Returns the for intro rule""" + """Return the for intro rule.""" return self._children[2] @property def key_expr(self) -> ExpressionRule: - """Returns the key expression""" + """Return the key expression.""" return self._children[4] @property def value_expr(self) -> ExpressionRule: - """Returns the value expression""" + """Return the value expression.""" return self._children[7] @property - def ellipsis(self) -> Optional[ELLIPSIS]: - """Returns the optional ellipsis token""" + def ellipsis(self): + """Return the optional ellipsis token.""" return self._children[9] @property def condition(self) -> Optional[ForCondRule]: - """Returns the optional condition rule""" + """Return the optional condition rule.""" return self._children[11] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to '{for ... : key => value}' string.""" result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index 49b20f65..e9722965 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -1,4 +1,5 @@ -from functools import lru_cache +"""Rule classes for HCL2 function calls and arguments.""" + from typing import Any, Optional, Tuple, Union, List from hcl2.rules.expressions import ExpressionRule @@ -16,8 +17,9 @@ class ArgumentsRule(InlineCommentMixIn): + """Rule for a comma-separated list of function arguments.""" - _children: Tuple[ + _children_layout: Tuple[ ExpressionRule, Tuple[ Optional[NewLineOrCommentRule], @@ -32,11 +34,12 @@ class ArgumentsRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "arguments" @property - @lru_cache(maxsize=None) def has_ellipsis(self) -> bool: + """Return whether the argument list ends with an ellipsis (...).""" for child in self._children[-2:]: if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": return True @@ -44,13 +47,15 @@ def has_ellipsis(self) -> bool: @property def arguments(self) -> List[ExpressionRule]: + """Return the list of expression arguments.""" return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a comma-separated argument string.""" result = ", ".join( - [str(argument.serialize(options, context)) for argument in self.arguments] + str(argument.serialize(options, context)) for argument in self.arguments ) if self.has_ellipsis: result += " ..." @@ -58,8 +63,9 @@ def serialize( class FunctionCallRule(InlineCommentMixIn): + """Rule for function call expressions (e.g. func(args)).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Optional[IdentifierRule], Optional[IdentifierRule], @@ -72,26 +78,34 @@ class FunctionCallRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "function_call" @property - @lru_cache(maxsize=None) def identifiers(self) -> List[IdentifierRule]: + """Return the function name identifier(s).""" return [child for child in self._children if isinstance(child, IdentifierRule)] @property - @lru_cache(maxsize=None) def arguments(self) -> Optional[ArgumentsRule]: + """Return the arguments rule, or None if no arguments.""" for child in self._children: if isinstance(child, ArgumentsRule): return child + return None def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'func(args)' string.""" with context.modify(inside_dollar_string=True): - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" + name = "::".join( + identifier.serialize(options, context) + for identifier in self.identifiers + ) + args = self.arguments + args_str = args.serialize(options, context) if args else "" + result = f"{name}({args_str})" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -100,7 +114,9 @@ def serialize( class ProviderFunctionCallRule(FunctionCallRule): - _children: Tuple[ + """Rule for provider-namespaced function calls.""" + + _children_layout: Tuple[ IdentifierRule, IdentifierRule, IdentifierRule, @@ -113,4 +129,5 @@ class ProviderFunctionCallRule(FunctionCallRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "provider_function_call" diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index fc8cbf90..5cfefb96 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 indexing, attribute access, and splat expressions.""" + from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta @@ -24,28 +26,34 @@ class ShortIndexRule(LarkRule): + """Rule for dot-numeric index access (e.g. .0).""" - _children: Tuple[ + _children_layout: Tuple[ DOT, IntLiteral, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "short_index" @property def index(self): + """Return the index token.""" return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.N' string.""" return f".{self.index.serialize(options)}" class SqbIndexRule(InlineCommentMixIn): - _children: Tuple[ + """Rule for square-bracket index access (e.g. [expr]).""" + + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], ExprTermRule, @@ -55,15 +63,18 @@ class SqbIndexRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "braces_index" @property def index_expression(self): + """Return the index expression inside the brackets.""" return self.children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '[expr]' string.""" return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -72,118 +83,146 @@ def __init__(self, children, meta: Optional[Meta] = None): class IndexExprTermRule(ExpressionRule): + """Rule for index access on an expression term.""" - _children: Tuple[ExprTermRule, SqbIndexRule] + _children_layout: Tuple[ExprTermRule, SqbIndexRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "index_expr_term" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr[index]' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + expr = self.children[0].serialize(options) + index = self.children[1].serialize(options) + result = f"{expr}{index}" if not context.inside_dollar_string: result = to_dollar_string(result) return result class GetAttrRule(LarkRule): + """Rule for dot-attribute access (e.g. .name).""" - _children: Tuple[ + _children_layout: Tuple[ DOT, IdentifierRule, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "get_attr" @property def identifier(self) -> IdentifierRule: + """Return the accessed identifier.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.identifier' string.""" return f".{self.identifier.serialize(options, context)}" class GetAttrExprTermRule(ExpressionRule): + """Rule for attribute access on an expression term.""" - _children: Tuple[ + _children_layout: Tuple[ ExprTermRule, GetAttrRule, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "get_attr_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def get_attr(self) -> GetAttrRule: + """Return the attribute access rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr.attr' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + attr = self.get_attr.serialize(options, context) + result = f"{expr}{attr}" if not context.inside_dollar_string: result = to_dollar_string(result) return result class AttrSplatRule(LarkRule): - _children: Tuple[ + """Rule for attribute splat expressions (e.g. .*.attr).""" + + _children_layout: Tuple[ ATTR_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attr_splat" @property def get_attrs( self, - ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + ) -> List[Union[GetAttrRule, SqbIndexRule, ShortIndexRule]]: + """Return the trailing accessor chain.""" return self._children[1:] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.*...' string.""" return ".*" + "".join( get_attr.serialize(options, context) for get_attr in self.get_attrs ) class AttrSplatExprTermRule(ExpressionRule): + """Rule for attribute splat on an expression term.""" - _children: Tuple[ExprTermRule, AttrSplatRule] + _children_layout: Tuple[ExprTermRule, AttrSplatRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attr_splat_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def attr_splat(self) -> AttrSplatRule: + """Return the attribute splat rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr.*...' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + splat = self.attr_splat.serialize(options, context) + result = f"{expr}{splat}" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -191,49 +230,62 @@ def serialize( class FullSplatRule(LarkRule): - _children: Tuple[ + """Rule for full splat expressions (e.g. [*].attr).""" + + _children_layout: Tuple[ ATTR_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "full_splat" @property def get_attrs( self, - ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + ) -> List[Union[GetAttrRule, SqbIndexRule, ShortIndexRule]]: + """Return the trailing accessor chain.""" return self._children[1:] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '[*]...' string.""" return "[*]" + "".join( get_attr.serialize(options, context) for get_attr in self.get_attrs ) class FullSplatExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, FullSplatRule] + """Rule for full splat on an expression term.""" + + _children_layout: Tuple[ExprTermRule, FullSplatRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "full_splat_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def attr_splat(self) -> FullSplatRule: + """Return the full splat rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr[*]...' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + splat = self.attr_splat.serialize(options, context) + result = f"{expr}{splat}" if not context.inside_dollar_string: result = to_dollar_string(result) diff --git a/hcl2/rules/literal_rules.py b/hcl2/rules/literal_rules.py index cb8396b4..1db333f5 100644 --- a/hcl2/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -1,3 +1,5 @@ +"""Rule classes for literal values (keywords, identifiers, numbers, operators).""" + from abc import ABC from typing import Any, Tuple @@ -6,49 +8,69 @@ class TokenRule(LarkRule, ABC): + """Base rule wrapping a single token child.""" - _children: Tuple[LarkToken] + _children_layout: Tuple[LarkToken] @property def token(self) -> LarkToken: + """Return the single token child.""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize by delegating to the token's own serialization.""" return self.token.serialize() class KeywordRule(TokenRule): + """Rule for HCL2 keyword literals (true, false, null).""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "keyword" class IdentifierRule(TokenRule): + """Rule for HCL2 identifiers.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "identifier" class IntLitRule(TokenRule): + """Rule for integer literal expressions.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "int_lit" class FloatLitRule(TokenRule): + """Rule for floating-point literal expressions.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "float_lit" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize, preserving scientific notation when configured.""" value = self.token.value # Scientific notation (e.g. 1.23e5) cannot survive a Python float() # round-trip, so preserve it as a ${...} expression string. - if options.preserve_scientific_notation and isinstance(value, str) and "e" in value.lower(): + if ( + options.preserve_scientific_notation + and isinstance(value, str) + and "e" in value.lower() + ): if context.inside_dollar_string: return value return to_dollar_string(value) @@ -56,6 +78,9 @@ def serialize( class BinaryOperatorRule(TokenRule): + """Rule for binary operator tokens.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_operator" diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index b757f317..c56e6e79 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 string literals, interpolation, and heredoc templates.""" + import sys from typing import Tuple, List, Any, Union @@ -9,21 +11,22 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, - HEREDOC_TEMPLATE, + HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, - HEREDOC_TRIM_PATTERN, + HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN, ) class InterpolationRule(LarkRule): + """Rule for ${expression} interpolation within strings.""" - _children: Tuple[ + _children_layout: Tuple[ INTERP_START, ExpressionRule, RBRACE, @@ -31,78 +34,97 @@ class InterpolationRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "interpolation" @property def expression(self): + """Return the interpolated expression.""" return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to ${expression} string.""" return to_dollar_string(self.expression.serialize(options)) class StringPartRule(LarkRule): - _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] + """Rule for a single part of a string (literal text, escape, or interpolation).""" + + _children_layout: Tuple[ + Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule] + ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "string_part" @property def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + """Return the content element (string chars, escape, or interpolation).""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this string part.""" return self.content.serialize(options, context) class StringRule(LarkRule): + """Rule for quoted string literals.""" - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children_layout: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "string" @property def string_parts(self): + """Return the list of string parts between quotes.""" return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a quoted string.""" return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class HeredocTemplateRule(LarkRule): - - _children: Tuple[HEREDOC_TEMPLATE] + """Rule for heredoc template strings (< str: + """Return the grammar rule name.""" return "heredoc_template" - + @property def heredoc(self): + """Return the raw heredoc token.""" return self.children[0] - + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the heredoc, optionally stripping to a plain string.""" heredoc = self.heredoc.serialize(options, context) - + if not options.preserve_heredocs: match = HEREDOC_PATTERN.match(heredoc) if not match: raise RuntimeError(f"Invalid Heredoc token: {heredoc}") heredoc = match.group(2).rstrip(self._trim_chars) - heredoc = heredoc.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') + heredoc = ( + heredoc.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + ) return f'"{heredoc}"' result = heredoc.rstrip(self._trim_chars) @@ -110,16 +132,19 @@ def serialize( class HeredocTrimTemplateRule(HeredocTemplateRule): + """Rule for indented heredoc template strings (<<-MARKER).""" + + _children_layout: Tuple[HEREDOC_TRIM_TEMPLATE] - _children: Tuple[HEREDOC_TRIM_TEMPLATE] - @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "heredoc_trim_template" - + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the trim heredoc, stripping common leading whitespace.""" # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions # This is a special version of heredocs that are declared with "<<-" # This will calculate the minimum number of leading spaces in each line of a heredoc @@ -146,8 +171,7 @@ def serialize( lines = [line[min_spaces:] for line in lines] if not options.preserve_heredocs: - lines = [line.replace('\\', '\\\\').replace('"', '\\"') for line in lines] + lines = [line.replace("\\", "\\\\").replace('"', '\\"') for line in lines] sep = "\\n" if not options.preserve_heredocs else "\n" return '"' + sep.join(lines) + '"' - \ No newline at end of file diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index 06d1611f..e648bc14 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,5 +1,7 @@ +"""Token classes for terminal elements in the LarkElement tree.""" + from functools import lru_cache -from typing import Callable, Any, Type, Optional, Tuple +from typing import Callable, Any, Dict, Type, Optional, Tuple, Union from hcl2.rules.abstract import LarkToken @@ -24,28 +26,31 @@ def __build_subclass(cls, name: str) -> Type["StringToken"]: ) def __class_getitem__(cls, name: str) -> Type["StringToken"]: + """Return a cached subclass keyed by the given grammar token name.""" if not isinstance(name, str): raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Optional[Any] = None): - super().__init__(value) + def __init__(self, value: Optional[Union[str, int, float]] = None): + super().__init__(value) # type: ignore[arg-type] @property def serialize_conversion(self) -> Callable[[Any], str]: + """Return str as the conversion callable.""" return str class StaticStringToken(StringToken): + """A StringToken subclass with a fixed default value set at class-creation time.""" - classes_by_value = {} + classes_by_value: Dict[Optional[str], Type["StringToken"]] = {} @classmethod @lru_cache(maxsize=None) def __build_subclass( - cls, name: str, default_value: str = None + cls, name: str, default_value: Optional[str] = None ) -> Type["StringToken"]: - """Create a subclass with a constant `lark_name`.""" + """Create a subclass with a constant `lark_name` and default value.""" result = type( # type: ignore f"{name}_TOKEN", @@ -59,65 +64,83 @@ def __build_subclass( cls.classes_by_value[default_value] = result return result - def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: - name, default_value = value - return cls.__build_subclass(name, default_value) + def __class_getitem__( # type: ignore[override] + cls, name: Tuple[str, str] + ) -> Type["StringToken"]: + """Return a cached subclass keyed by a (token_name, default_value) tuple.""" + token_name, default_value = name + return cls.__build_subclass(token_name, default_value) def __init__(self): super().__init__(getattr(self, "_default_value")) @property def serialize_conversion(self) -> Callable[[Any], str]: + """Return str as the conversion callable.""" return str -# explicitly define various kinds of string-based tokens for type hinting +# Explicitly define various kinds of string-based tokens for type hinting. +# mypy cannot follow the dynamic __class_getitem__ pattern, so every alias +# in this block carries a blanket ``type: ignore``. +# pylint: disable=invalid-name + # variable values -NAME = StringToken["NAME"] -STRING_CHARS = StringToken["STRING_CHARS"] -ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] -BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] -NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] +NAME = StringToken["NAME"] # type: ignore +STRING_CHARS = StringToken["STRING_CHARS"] # type: ignore +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] # type: ignore +BINARY_OP = StringToken["BINARY_OP"] # type: ignore +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] # type: ignore +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] # type: ignore +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # type: ignore # static values -EQ = StaticStringToken[("EQ", "=")] -COLON = StaticStringToken[("COLON", ":")] -LPAR = StaticStringToken[("LPAR", "(")] -RPAR = StaticStringToken[("RPAR", ")")] -LBRACE = StaticStringToken[("LBRACE", "{")] -RBRACE = StaticStringToken[("RBRACE", "}")] -DOT = StaticStringToken[("DOT", ".")] -COMMA = StaticStringToken[("COMMA", ",")] -ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] -QMARK = StaticStringToken[("QMARK", "?")] -LSQB = StaticStringToken[("LSQB", "[")] -RSQB = StaticStringToken[("RSQB", "]")] -INTERP_START = StaticStringToken[("INTERP_START", "${")] -DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] -ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] -FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] -FOR = StaticStringToken[("FOR", "for")] -IN = StaticStringToken[("IN", "in")] -IF = StaticStringToken[("IF", "if")] -FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] +EQ = StaticStringToken[("EQ", "=")] # type: ignore +COLON = StaticStringToken[("COLON", ":")] # type: ignore +LPAR = StaticStringToken[("LPAR", "(")] # type: ignore +RPAR = StaticStringToken[("RPAR", ")")] # type: ignore +LBRACE = StaticStringToken[("LBRACE", "{")] # type: ignore +RBRACE = StaticStringToken[("RBRACE", "}")] # type: ignore +DOT = StaticStringToken[("DOT", ".")] # type: ignore +COMMA = StaticStringToken[("COMMA", ",")] # type: ignore +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] # type: ignore +QMARK = StaticStringToken[("QMARK", "?")] # type: ignore +LSQB = StaticStringToken[("LSQB", "[")] # type: ignore +RSQB = StaticStringToken[("RSQB", "]")] # type: ignore +INTERP_START = StaticStringToken[("INTERP_START", "${")] # type: ignore +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] # type: ignore +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] # type: ignore +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] # type: ignore +FOR = StaticStringToken[("FOR", "for")] # type: ignore +IN = StaticStringToken[("IN", "in")] # type: ignore +IF = StaticStringToken[("IF", "if")] # type: ignore +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] # type: ignore + +# pylint: enable=invalid-name class IntLiteral(LarkToken): + """Token for integer literal values.""" + @staticmethod def lark_name() -> str: + """Return the grammar token name.""" return "INT_LITERAL" @property def serialize_conversion(self) -> Callable: + """Return int as the conversion callable.""" return int class FloatLiteral(LarkToken): + """Token for floating-point literal values.""" + @staticmethod def lark_name() -> str: + """Return the grammar token name.""" return "FLOAT_LITERAL" @property def serialize_conversion(self) -> Callable: + """Return float as the conversion callable.""" return float diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 5f2fa886..540845d7 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -1,28 +1,37 @@ +"""Rule classes for whitespace, comments, and inline comment handling.""" + from abc import ABC -from typing import Optional, List, Any, Tuple +from typing import Optional, List, Any -from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.abstract import LarkRule from hcl2.rules.literal_rules import TokenRule +from hcl2.rules.tokens import NL_OR_COMMENT from hcl2.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): + """Rule for newline and comment tokens.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "new_line_or_comment" @classmethod def from_string(cls, string: str) -> "NewLineOrCommentRule": - return cls([LarkToken("NL_OR_COMMENT", string)]) + """Create an instance from a raw comment or newline string.""" + return cls([NL_OR_COMMENT(string)]) # type: ignore[abstract] # pylint: disable=abstract-class-instantiated def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to the raw comment/newline string.""" return self.token.serialize() def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: + """Extract comment text strings, or None if only a newline.""" comment = self.serialize(options) if comment == "\n": return None @@ -48,8 +57,11 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _insert_optionals(self, children: List, indexes: List[int] = None): - for index in indexes: + """Mixin for rules that may contain inline comments among their children.""" + + def _insert_optionals(self, children: List, indexes: Optional[List[int]] = None): + """Insert None placeholders at expected optional-child positions.""" + for index in indexes: # type: ignore[union-attr] try: child = children[index] except IndexError: @@ -59,6 +71,7 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): children.insert(index, None) def inline_comments(self): + """Collect all inline comment strings from this rule's children.""" result = [] for child in self._children: From 72078f07e9cdd61e6cf628d850874926761eb058 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 20:42:05 +0100 Subject: [PATCH 35/45] fix pre-commit errors --- .coveragerc | 2 +- .github/ISSUE_TEMPLATE/hcl2-parsing-error.md | 14 +- .pre-commit-config.yaml | 1 + cli/hcl_to_json.py | 27 +- cli/helpers.py | 6 +- cli/json_to_hcl.py | 26 +- hcl2/api.py | 35 +- hcl2/deserializer.py | 51 +-- hcl2/formatter.py | 46 ++- hcl2/reconstructor.py | 35 +- hcl2/rules/abstract.py | 8 +- hcl2/rules/tokens.py | 12 +- hcl2/rules/whitespace.py | 4 +- hcl2/transformer.py | 12 +- hcl2/utils.py | 11 + pylintrc | 2 +- test/integration/hcl2_original/smoke.tf | 2 +- test/integration/test_round_trip.py | 15 +- test/integration/test_specialized.py | 14 +- test/unit/cli/test_hcl_to_json.py | 16 +- test/unit/cli/test_helpers.py | 47 ++- test/unit/cli/test_json_to_hcl.py | 16 +- test/unit/rules/test_abstract.py | 3 +- test/unit/rules/test_base.py | 11 +- test/unit/rules/test_containers.py | 1 + test/unit/rules/test_expressions.py | 8 +- test/unit/rules/test_for_expressions.py | 337 +++++++++++-------- test/unit/rules/test_functions.py | 1 + test/unit/rules/test_literal_rules.py | 1 + test/unit/rules/test_strings.py | 9 +- test/unit/rules/test_tokens.py | 3 +- test/unit/rules/test_whitespace.py | 9 +- test/unit/test_api.py | 51 ++- test/unit/test_builder.py | 6 +- test/unit/test_deserializer.py | 16 +- test/unit/test_formatter.py | 95 +++--- test/unit/test_utils.py | 1 + 37 files changed, 561 insertions(+), 393 deletions(-) diff --git a/.coveragerc b/.coveragerc index 558bc244..89ef860b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,7 +3,7 @@ branch = true omit = hcl2/lark_parser.py hcl2/version.py - hcl2/__main__.py + hcl2/__main__.py hcl2/__init__.py hcl2/rules/__init__.py cli/__init__.py diff --git a/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md b/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md index 4837d3ff..1b526e9a 100644 --- a/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md +++ b/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md @@ -1,27 +1,31 @@ ---- +______________________________________________________________________ + name: HCL2 parsing error about: Template for reporting a bug related to parsing HCL2 code title: '' labels: bug assignees: kkozik-amplify ---- +______________________________________________________________________ **Describe the bug** A clear and concise description of what the bug is. **Software:** - - OS: [macOS / Windows / Linux] - - Python version (e.g. 3.9.21) - - python-hcl2 version (e.g. 7.0.0) + +- OS: \[macOS / Windows / Linux\] +- Python version (e.g. 3.9.21) +- python-hcl2 version (e.g. 7.0.0) **Snippet of HCL2 code causing the unexpected behaviour:** + ```terraform locals { foo = "bar" } ``` + **Expected behavior** A clear and concise description of what you expected to happen, e.g. python dictionary or JSON you expected to receive as a result of parsing. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11b63555..ef43294d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,7 @@ repos: rev: v4.3.0 hooks: - id: trailing-whitespace + exclude: ^test/integration/(hcl2_reconstructed|specialized)/ - id: end-of-file-fixer - id: check-added-large-files - id: no-commit-to-branch # Prevent commits directly to master diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index faa9fb33..d4acf0e5 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -2,16 +2,24 @@ import argparse import json import os -from typing import IO +from typing import IO, Optional, TextIO from hcl2 import load from hcl2.utils import SerializationOptions from hcl2.version import __version__ -from .helpers import HCL_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin +from .helpers import ( + HCL_SKIPPABLE, + _convert_single_file, + _convert_directory, + _convert_stdin, +) def _hcl_to_json( - in_file: IO, out_file: IO, options: SerializationOptions, json_indent: int = None, + in_file: TextIO, + out_file: IO, + options: SerializationOptions, + json_indent: Optional[int] = None, ) -> None: data = load(in_file, serialization_options=options) json.dump(data, out_file, indent=json_indent) @@ -108,11 +116,18 @@ def convert(in_file, out_file): if args.PATH == "-": _convert_stdin(convert) elif os.path.isfile(args.PATH): - _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE) + _convert_single_file( + args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE + ) elif os.path.isdir(args.PATH): _convert_directory( - args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE, - in_extensions={".tf", ".hcl"}, out_extension=".json", + args.PATH, + args.OUT_PATH, + convert, + args.skip, + HCL_SKIPPABLE, + in_extensions={".tf", ".hcl"}, + out_extension=".json", ) else: raise RuntimeError("Invalid Path", args.PATH) diff --git a/cli/helpers.py b/cli/helpers.py index bef6ba6f..6d463d45 100644 --- a/cli/helpers.py +++ b/cli/helpers.py @@ -2,7 +2,7 @@ import json import os import sys -from typing import Callable, IO, Set, Tuple +from typing import Callable, IO, Set, Tuple, Type from lark import UnexpectedCharacters, UnexpectedToken @@ -16,7 +16,7 @@ def _convert_single_file( out_path: str, convert_fn: Callable[[IO, IO], None], skip: bool, - skippable: Tuple[type, ...], + skippable: Tuple[Type[BaseException], ...], ) -> None: with open(in_path, "r", encoding="utf-8") as in_file: print(in_path, file=sys.stderr, flush=True) @@ -43,7 +43,7 @@ def _convert_directory( out_path: str, convert_fn: Callable[[IO, IO], None], skip: bool, - skippable: Tuple[type, ...], + skippable: Tuple[Type[BaseException], ...], in_extensions: Set[str], out_extension: str, ) -> None: diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py index 48ade7c0..65caeb09 100644 --- a/cli/json_to_hcl.py +++ b/cli/json_to_hcl.py @@ -2,18 +2,23 @@ import argparse import json import os -from typing import IO +from typing import TextIO from hcl2 import dump from hcl2.deserializer import DeserializerOptions from hcl2.formatter import FormatterOptions from hcl2.version import __version__ -from .helpers import JSON_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin +from .helpers import ( + JSON_SKIPPABLE, + _convert_single_file, + _convert_directory, + _convert_stdin, +) def _json_to_hcl( - in_file: IO, - out_file: IO, + in_file: TextIO, + out_file: TextIO, d_opts: DeserializerOptions, f_opts: FormatterOptions, ) -> None: @@ -114,11 +119,18 @@ def convert(in_file, out_file): if args.PATH == "-": _convert_stdin(convert) elif os.path.isfile(args.PATH): - _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE) + _convert_single_file( + args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE + ) elif os.path.isdir(args.PATH): _convert_directory( - args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE, - in_extensions={".json"}, out_extension=".tf", + args.PATH, + args.OUT_PATH, + convert, + args.skip, + JSON_SKIPPABLE, + in_extensions={".json"}, + out_extension=".tf", ) else: raise RuntimeError("Invalid Path", args.PATH) diff --git a/hcl2/api.py b/hcl2/api.py index 0238f418..db4caa72 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -64,7 +64,13 @@ def dump( :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. """ - file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) + file.write( + dumps( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + ) + ) def dumps( @@ -79,7 +85,11 @@ def dumps( :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. """ - tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + tree = from_dict( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + ) return reconstruct(tree) @@ -135,18 +145,18 @@ def from_dict( *, deserializer_options: Optional[DeserializerOptions] = None, formatter_options: Optional[FormatterOptions] = None, - format: bool = True, + apply_format: bool = True, ) -> StartRule: """Convert a Python dict into a LarkElement tree. :param data: Python dict (as produced by :func:`load`). :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. - :param format: If True (default), apply formatting to the tree. + :param apply_format: If True (default), apply formatting to the tree. """ deserializer = BaseDeserializer(deserializer_options) tree = deserializer.load_python(data) - if format: + if apply_format: formatter = BaseFormatter(formatter_options) formatter.format_tree(tree) return tree @@ -157,17 +167,22 @@ def from_json( *, deserializer_options: Optional[DeserializerOptions] = None, formatter_options: Optional[FormatterOptions] = None, - format: bool = True, + apply_format: bool = True, ) -> StartRule: """Convert a JSON string into a LarkElement tree. :param text: JSON string. :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. - :param format: If True (default), apply formatting to the tree. + :param apply_format: If True (default), apply formatting to the tree. """ data = _json.loads(text) - return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + return from_dict( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + apply_format=apply_format, + ) def reconstruct(tree) -> str: @@ -187,7 +202,9 @@ def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. :param discard_comments: If True, discard comments during transformation. """ - return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform( + lark_tree + ) def serialize( diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 328427db..03dac5d8 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,9 +1,10 @@ +"""Deserialize Python dicts (or JSON) into LarkElement trees.""" import json import re from abc import ABC, abstractmethod from dataclasses import dataclass from functools import cached_property -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Optional, Union from regex import regex @@ -62,34 +63,40 @@ @dataclass class DeserializerOptions: + """Options controlling how Python dicts are deserialized into LarkElement trees.""" + heredocs_to_strings: bool = False strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True - # with_comments: bool = False # TODO + # with_comments: bool = False # TODO class LarkElementTreeDeserializer(ABC): - def __init__(self, options: DeserializerOptions = None): + """Abstract base for deserializers that produce LarkElement trees.""" + + def __init__(self, options: Optional[DeserializerOptions] = None): self.options = options or DeserializerOptions() @abstractmethod def loads(self, value: str) -> LarkElement: + """Deserialize a JSON string into a LarkElement tree.""" raise NotImplementedError() def load(self, file: TextIO) -> LarkElement: + """Deserialize a JSON file into a LarkElement tree.""" return self.loads(file.read()) class BaseDeserializer(LarkElementTreeDeserializer): - def __init__(self, options=None): - super().__init__(options) + """Default deserializer: Python dict/JSON → LarkElement tree.""" @cached_property def _transformer(self) -> RuleTransformer: return RuleTransformer() - def load_python(self, value: Any) -> LarkElement: + def load_python(self, value: Any) -> StartRule: + """Deserialize a Python object into a StartRule tree.""" if isinstance(value, dict): # Top-level dict is always a body (attributes + blocks), not an object children = self._deserialize_block_elements(value) @@ -99,13 +106,14 @@ def load_python(self, value: Any) -> LarkElement: return result def loads(self, value: str) -> LarkElement: + """Deserialize a JSON string into a LarkElement tree.""" return self.load_python(json.loads(value)) def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - children = [] + children: List[Any] = [] block_elements = self._deserialize_block_elements(value) for element in block_elements: @@ -120,8 +128,8 @@ def _deserialize(self, value: Any) -> LarkElement: return self._deserialize_text(value) - def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: - children = [] + def _deserialize_block_elements(self, value: dict) -> List[LarkElement]: + children: List[LarkElement] = [] for key, val in value.items(): if self._is_block(val): # this value is a list of blocks, iterate over each block and deserialize them @@ -135,6 +143,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children + # pylint: disable=R0911 def _deserialize_text(self, value: Any) -> LarkRule: # bool must be checked before int since bool is a subclass of int if isinstance(value, bool): @@ -160,7 +169,7 @@ def _deserialize_text(self, value: Any) -> LarkRule: if self.options.strings_to_heredocs: inner = value[1:-1] - if '\\n' in inner: + if "\\n" in inner: return self._deserialize_string_as_heredoc(inner) return self._deserialize_string(value) @@ -192,8 +201,8 @@ def _deserialize_string(self, value: str) -> StringRule: if part.endswith('"'): part = part[:-1] - e = self._deserialize_string_part(part) - result.append(e) + string_part = self._deserialize_string_part(part) + result.append(string_part) return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) @@ -224,7 +233,7 @@ def _deserialize_string_as_heredoc(self, inner: str) -> HeredocTemplateRule: # Single-pass unescape: \\n → \n, \\" → ", \\\\ → \ content = re.sub( r'\\(n|"|\\)', - lambda m: '\n' if m.group(1) == 'n' else m.group(1), + lambda m: "\n" if m.group(1) == "n" else m.group(1), inner, ) heredoc = f"< AttributeRule: return AttributeRule(children) def _deserialize_list(self, value: List) -> TupleRule: - children = [] + children: List[Any] = [] for element in value: deserialized = self._deserialize(element) if not isinstance(deserialized, ExprTermRule): @@ -300,7 +309,7 @@ def _deserialize_list(self, value: List) -> TupleRule: return TupleRule([LSQB(), *children, RSQB()]) def _deserialize_object(self, value: dict) -> ObjectRule: - children = [] + children: List[Any] = [] for key, val in value.items(): children.append(self._deserialize_object_elem(key, val)) @@ -320,11 +329,11 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: ) elif isinstance(key, str) and "." in key: parts = key.split(".") - children = [] + dot_children: List[Any] = [] for part in parts: - children.append(self._deserialize_identifier(part)) - children.append(DOT()) - key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + dot_children.append(self._deserialize_identifier(part)) + dot_children.append(DOT()) + key = ObjectElemKeyDotAccessor(dot_children[:-1]) # without the last dot else: key = self._deserialize_text(key) @@ -364,6 +373,8 @@ def _contains_block_marker(self, obj: dict) -> bool: return True if isinstance(value, list): for element in value: - if isinstance(element, dict) and self._contains_block_marker(element): + if isinstance(element, dict) and self._contains_block_marker( + element + ): return True return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 23302187..8b691c44 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -1,6 +1,7 @@ +"""Format LarkElement trees with indentation, alignment, and spacing.""" from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import List +from typing import List, Optional from hcl2.rules.abstract import LarkElement from hcl2.rules.base import ( @@ -10,7 +11,7 @@ BodyRule, ) from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule -from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.expressions import ExprTermRule from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, @@ -23,6 +24,8 @@ @dataclass class FormatterOptions: + """Options controlling whitespace formatting of LarkElement trees.""" + indent_length: int = 2 open_empty_blocks: bool = True open_empty_objects: bool = True @@ -33,27 +36,35 @@ class FormatterOptions: class LarkElementTreeFormatter(ABC): - def __init__(self, options: FormatterOptions = None): + """Abstract base for formatters that operate on LarkElement trees.""" + + def __init__(self, options: Optional[FormatterOptions] = None): self.options = options or FormatterOptions() @abstractmethod def format_tree(self, tree: LarkElement): + """Apply formatting to the given LarkElement tree in place.""" raise NotImplementedError() class BaseFormatter(LarkElementTreeFormatter): - def __init__(self, options: FormatterOptions = None): + """Default formatter: adds indentation, newlines, and vertical alignment.""" + + def __init__(self, options: Optional[FormatterOptions] = None): super().__init__(options) - self._last_new_line: NewLineOrCommentRule = None + self._last_new_line: Optional[NewLineOrCommentRule] = None def format_tree(self, tree: LarkElement): + """Apply formatting to the given LarkElement tree in place.""" if isinstance(tree, StartRule): self.format_start_rule(tree) def format_start_rule(self, rule: StartRule): + """Format the top-level start rule.""" self.format_body_rule(rule.body, 0) def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + """Format a block rule with its body and closing brace.""" if self.options.vertically_align_attributes: self._vertically_align_attributes_in_body(rule.body) @@ -64,7 +75,7 @@ def format_block_rule(self, rule: BlockRule, indent_level: int = 0): rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) def format_body_rule(self, rule: BodyRule, indent_level: int = 0): - + """Format a body rule, adding newlines between attributes and blocks.""" in_start = isinstance(rule.parent, StartRule) new_children = [] @@ -90,9 +101,11 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): rule._children = new_children def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + """Format an attribute rule by formatting its value expression.""" self.format_expression(rule.expression, indent_level + 1) def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + """Format a tuple rule with one element per line.""" if len(rule.elements) == 0: if self.options.open_empty_tuples: rule.children.insert(1, self._build_newline(indent_level - 1, 2)) @@ -104,31 +117,31 @@ def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) - if isinstance(child, (COMMA, LSQB)): + if isinstance(child, (COMMA, LSQB)): # type: ignore[misc] new_children.append(self._build_newline(indent_level)) self._deindent_last_line() rule._children = new_children def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + """Format an object rule with one element per line and optional alignment.""" if len(rule.elements) == 0: if self.options.open_empty_objects: rule.children.insert(1, self._build_newline(indent_level - 1, 2)) return new_children = [] - for i in range(len(rule.children)): - child = rule.children[i] + for i, child in enumerate(rule.children): next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None new_children.append(child) - if isinstance(child, LBRACE): + if isinstance(child, LBRACE): # type: ignore[misc] new_children.append(self._build_newline(indent_level)) if ( next_child and isinstance(next_child, ObjectElemRule) - and isinstance(child, (ObjectElemRule, COMMA)) + and isinstance(child, (ObjectElemRule, COMMA)) # type: ignore[misc] ): new_children.append(self._build_newline(indent_level)) @@ -144,6 +157,7 @@ def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): self._vertically_align_object_elems(rule) def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + """Dispatch formatting for the inner expression of an ExprTermRule.""" if isinstance(rule.expression, ObjectRule): self.format_object_rule(rule.expression, indent_level) @@ -160,6 +174,7 @@ def format_expression(self, rule: ExprTermRule, indent_level: int = 0): self.format_expression(rule.expression, indent_level) def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + """Format a for-tuple expression with newlines around clauses.""" for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) @@ -182,6 +197,7 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = def format_forobjectexpr( self, expression: ForObjectExprRule, indent_level: int = 0 ): + """Format a for-object expression with newlines around clauses.""" for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) @@ -220,8 +236,7 @@ def _vertically_align_attributes_in_body(self, body: BodyRule): def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): max_length = max( - len(attribute.identifier.token.value) - for attribute in attributes_sequence + len(attribute.identifier.token.value) for attribute in attributes_sequence ) for attribute in attributes_sequence: name_length = len(attribute.identifier.token.value) @@ -238,7 +253,7 @@ def _vertically_align_object_elems(self, rule: ObjectRule): spaces_to_add = max_length - key_length separator = elem.children[1] - if isinstance(separator, COLON): + if isinstance(separator, COLON): # type: ignore[misc] spaces_to_add += 1 elem.children[1].set_value(" " * spaces_to_add + separator.value) @@ -257,7 +272,8 @@ def _build_newline( return result def _deindent_last_line(self, times: int = 1): + assert self._last_new_line is not None token = self._last_new_line.token - for i in range(times): + for _ in range(times): if token.value.endswith(" " * self.options.indent_length): token.set_value(token.value[: -self.options.indent_length]) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index abfc21f6..b9f3b3ce 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,4 +1,5 @@ -from typing import List, Union +"""Reconstruct HCL2 text from a Lark Tree AST.""" +from typing import List, Optional, Union from lark import Tree, Token from hcl2.rules import tokens @@ -33,10 +34,16 @@ class HCLReconstructor: } def __init__(self): - self._reset_state() + self._last_was_space = True + self._current_indent = 0 + self._last_token_name: Optional[str] = None + self._last_rule_name: Optional[str] = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False def _reset_state(self): - """State tracking for formatting decisions""" + """Reset state tracking for formatting decisions.""" self._last_was_space = True self._current_indent = 0 self._last_token_name = None @@ -45,8 +52,9 @@ def _reset_state(self): self._in_object = False self._in_tuple = False + # pylint:disable=R0911,R0912 def _should_add_space_before( - self, current_node: Union[Tree, Token], parent_rule_name: str = None + self, current_node: Union[Tree, Token], parent_rule_name: Optional[str] = None ) -> bool: """Determine if we should add a space before the current token/rule.""" @@ -151,7 +159,9 @@ def _should_add_space_before( return False - def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + def _reconstruct_tree( + self, tree: Tree, parent_rule_name: Optional[str] = None + ) -> List[str]: """Recursively reconstruct a Tree node into HCL text fragments.""" result = [] rule_name = tree.data @@ -197,7 +207,9 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st return result - def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + def _reconstruct_token( + self, token: Token, parent_rule_name: Optional[str] = None + ) -> str: """Reconstruct a Token node into HCL text fragments.""" result = str(token.value) if self._should_add_space_before(token, parent_rule_name): @@ -210,18 +222,17 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: return result def _reconstruct_node( - self, node: Union[Tree, Token], parent_rule_name: str = None + self, node: Union[Tree, Token], parent_rule_name: Optional[str] = None ) -> List[str]: """Reconstruct any node (Tree or Token) into HCL text fragments.""" if isinstance(node, Tree): return self._reconstruct_tree(node, parent_rule_name) - elif isinstance(node, Token): + if isinstance(node, Token): return [self._reconstruct_token(node, parent_rule_name)] - else: - # Fallback: convert to string - return [str(node)] + # Fallback: convert to string + return [str(node)] - def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + def reconstruct(self, tree: Tree, postproc=None) -> str: """Convert a Lark.Tree AST back into a string representation of HCL.""" # Reset state self._reset_state() diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index 26cda89c..554bc44d 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -46,7 +46,7 @@ def serialize( class LarkToken(LarkElement, ABC): """Base class for terminal token elements (leaves of the tree).""" - def __init__(self, value: Union[str, int, float]): + def __init__(self, value: Optional[Union[str, int, float]] = None): self._value = value super().__init__() @@ -100,7 +100,7 @@ def serialize( raise NotImplementedError() @property - def children(self) -> List[LarkElement]: + def children(self) -> List[Any]: """Return the list of child elements.""" return self._children @@ -125,9 +125,9 @@ def to_lark(self) -> Tree: return Tree(self.lark_name(), result_children, meta=self._meta) - def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + def __init__(self, children: List[Any], meta: Optional[Meta] = None): super().__init__() - self._children = children + self._children: List[Any] = children self._meta = meta or Meta() for index, child in enumerate(children): diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index e648bc14..dab5ac4f 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,7 +1,7 @@ """Token classes for terminal elements in the LarkElement tree.""" from functools import lru_cache -from typing import Callable, Any, Dict, Type, Optional, Tuple, Union +from typing import Callable, Any, Dict, Type, Optional, Tuple from hcl2.rules.abstract import LarkToken @@ -12,6 +12,13 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ + @staticmethod + def lark_name() -> str: + """Overridden by dynamic subclasses created via ``__class_getitem__``.""" + raise NotImplementedError( + "Use StringToken['NAME'] to create a concrete subclass" + ) + @classmethod @lru_cache(maxsize=None) def __build_subclass(cls, name: str) -> Type["StringToken"]: @@ -31,9 +38,6 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Optional[Union[str, int, float]] = None): - super().__init__(value) # type: ignore[arg-type] - @property def serialize_conversion(self) -> Callable[[Any], str]: """Return str as the conversion callable.""" diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 540845d7..8591fd20 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -61,7 +61,9 @@ class InlineCommentMixIn(LarkRule, ABC): def _insert_optionals(self, children: List, indexes: Optional[List[int]] = None): """Insert None placeholders at expected optional-child positions.""" - for index in indexes: # type: ignore[union-attr] + if indexes is None: + return + for index in indexes: try: child = children[index] except IndexError: diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 7de4f7e1..aebf9d1f 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -1,3 +1,4 @@ +"""Transform Lark parse trees into typed LarkElement rule trees.""" # pylint: disable=missing-function-docstring,unused-argument from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -81,16 +82,19 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable - if token.value in StaticStringToken.classes_by_value.keys(): + if token.value in StaticStringToken.classes_by_value: return StaticStringToken.classes_by_value[token.value]() - return StringToken[token.type](token.value) + return StringToken[token.type](token.value) # type: ignore[misc] + # pylint: disable=C0103 def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: return FloatLiteral(token.value) + # pylint: disable=C0103 def NAME(self, token: Token) -> NAME: return NAME(token.value) + # pylint: disable=C0103 def INT_LITERAL(self, token: Token) -> IntLiteral: return IntLiteral(token.value) @@ -114,7 +118,9 @@ def attribute(self, meta: Meta, args) -> AttributeRule: return AttributeRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + def new_line_or_comment( + self, meta: Meta, args + ): # -> NewLineOrCommentRule | Discard if self.discard_new_line_or_comments: return Discard return NewLineOrCommentRule(args, meta) diff --git a/hcl2/utils.py b/hcl2/utils.py index b15dda8a..4eb31470 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -1,3 +1,4 @@ +"""Serialization options, context tracking, and string utility helpers.""" import re from contextlib import contextmanager from dataclasses import dataclass, replace @@ -9,6 +10,8 @@ @dataclass class SerializationOptions: + """Options controlling how LarkElement trees are serialized to Python dicts.""" + with_comments: bool = True with_meta: bool = False wrap_objects: bool = False @@ -21,10 +24,13 @@ class SerializationOptions: @dataclass class SerializationContext: + """Mutable state tracked during serialization traversal.""" + inside_dollar_string: bool = False inside_parentheses: bool = False def replace(self, **kwargs) -> "SerializationContext": + """Return a new context with the given fields overridden.""" return replace(self, **kwargs) @contextmanager @@ -35,6 +41,7 @@ def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: @contextmanager def modify(self, **kwargs): + """Context manager that temporarily mutates fields, restoring on exit.""" original_values = {key: getattr(self, key) for key in kwargs} for key, value in kwargs.items(): @@ -49,24 +56,28 @@ def modify(self, **kwargs): def is_dollar_string(value: str) -> bool: + """Return True if value is a ${...} interpolation wrapper.""" if not isinstance(value, str): return False return value.startswith("${") and value.endswith("}") def to_dollar_string(value: str) -> str: + """Wrap value in ${...} if not already wrapped.""" if not is_dollar_string(value): return f"${{{value}}}" return value def unwrap_dollar_string(value: str) -> str: + """Strip the ${...} wrapper from value if present.""" if is_dollar_string(value): return value[2:-1] return value def wrap_into_parentheses(value: str) -> str: + """Wrap value in parentheses, preserving ${...} wrappers.""" if is_dollar_string(value): value = unwrap_dollar_string(value) return to_dollar_string(f"({value})") diff --git a/pylintrc b/pylintrc index edd28005..34599008 100644 --- a/pylintrc +++ b/pylintrc @@ -9,7 +9,7 @@ # Add to the black list. It should be a base name, not a # path. You may set this option multiple times. -ignore=CVS +ignore=CVS,version.py # Pickle collected data for later comparisons. persistent=yes diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index e2a0843b..99537532 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -37,7 +37,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py index a963c4f8..67217f07 100644 --- a/test/integration/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -9,6 +9,7 @@ 3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) 4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) """ +# pylint: disable=C0103,C0114,C0115,C0116 import json from enum import Enum @@ -120,7 +121,10 @@ def test_hcl_to_json(self): class TestRoundTripReserialization(TestCase): - """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + """Test JSON → JSON reserialization. + + Parse HCL, serialize, deserialize, reserialize, compare with expected. + """ maxDiff = None @@ -128,7 +132,9 @@ def test_json_reserialization(self): for suite in _get_suites(): with self.subTest(suite=suite): hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + json_reserialized_path = _get_suite_file( + suite, SuiteStep.JSON_RESERIALIZED + ) serialized = _parse_and_serialize(hcl_path.read_text()) actual = _deserialize_and_reserialize(serialized) @@ -142,7 +148,10 @@ def test_json_reserialization(self): class TestRoundTripReconstruction(TestCase): - """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + """Test JSON → HCL reconstruction. + + Parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL. + """ maxDiff = None diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py index 6fc175ef..1415f307 100644 --- a/test/integration/test_specialized.py +++ b/test/integration/test_specialized.py @@ -4,22 +4,24 @@ (operator precedence, Builder round-trip) with dedicated golden files in test/integration/special/. """ +# pylint: disable=C0103,C0114,C0115,C0116 import json from pathlib import Path +from typing import Optional from unittest import TestCase -from hcl2.deserializer import BaseDeserializer, DeserializerOptions -from hcl2.formatter import BaseFormatter -from hcl2.reconstructor import HCLReconstructor -from hcl2.utils import SerializationOptions - from test.integration.test_round_trip import ( _parse_and_serialize, _deserialize_and_reserialize, _deserialize_and_reconstruct, ) +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.utils import SerializationOptions + SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" @@ -82,7 +84,7 @@ def test_builder_reserialization(self): def _deserialize_and_reconstruct_with_options( serialized: dict, - deserializer_options: DeserializerOptions = None, + deserializer_options: Optional[DeserializerOptions] = None, ) -> str: """Deserialize a Python dict and reconstruct HCL text with custom options.""" deserializer = BaseDeserializer(deserializer_options) diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 67c8c48f..0b40c896 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import json import os import tempfile @@ -8,7 +9,7 @@ from cli.hcl_to_json import main -SIMPLE_HCL = 'x = 1\n' +SIMPLE_HCL = "x = 1\n" SIMPLE_JSON_DICT = {"x": 1} @@ -23,7 +24,6 @@ def _read_file(path): class TestHclToJson(TestCase): - def test_single_file_to_stdout(self): with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") @@ -94,7 +94,7 @@ def test_with_meta_flag(self): self.assertIn("resource", result) def test_no_comments_flag(self): - hcl_with_comment = '# a comment\nx = 1\n' + hcl_with_comment = "# a comment\nx = 1\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_with_comment) @@ -108,7 +108,7 @@ def test_no_comments_flag(self): self.assertIn("comment", output) def test_wrap_objects_flag(self): - hcl_input = 'x = {\n a = 1\n}\n' + hcl_input = "x = {\n a = 1\n}\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_input) @@ -127,7 +127,7 @@ def test_wrap_objects_flag(self): self.assertNotEqual(default["x"], wrapped["x"]) def test_wrap_tuples_flag(self): - hcl_input = 'x = [1, 2]\n' + hcl_input = "x = [1, 2]\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_input) @@ -176,7 +176,6 @@ def test_invalid_path_raises_error(self): class TestSingleFileErrorHandling(TestCase): - def test_skip_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: in_path = os.path.join(tmpdir, "test.tf") @@ -224,7 +223,6 @@ def test_raise_error_to_stdout(self): class TestDirectoryEdgeCases(TestCase): - def test_subdirectory_creation(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") @@ -237,9 +235,7 @@ def test_subdirectory_creation(self): with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): main() - self.assertTrue( - os.path.exists(os.path.join(out_dir, "sub", "nested.json")) - ) + self.assertTrue(os.path.exists(os.path.join(out_dir, "sub", "nested.json"))) def test_directory_raise_error_without_skip(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/test/unit/cli/test_helpers.py b/test/unit/cli/test_helpers.py index 6859d0ab..ee07ac96 100644 --- a/test/unit/cli/test_helpers.py +++ b/test/unit/cli/test_helpers.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import os import tempfile from io import StringIO @@ -13,7 +14,6 @@ def _write_file(path, content): class TestConvertSingleFile(TestCase): - def test_does_not_close_stdout(self): """Regression test: stdout must not be closed after writing.""" with tempfile.TemporaryDirectory() as tmpdir: @@ -83,7 +83,6 @@ def convert(in_f, out_f): class TestConvertDirectory(TestCase): - def test_filters_by_extension(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") @@ -100,8 +99,13 @@ def convert(in_f, out_f): converted_files.append(True) _convert_directory( - in_dir, out_dir, convert, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) self.assertEqual(len(converted_files), 1) @@ -112,8 +116,13 @@ def test_requires_out_path(self): with tempfile.TemporaryDirectory() as tmpdir: with self.assertRaises(RuntimeError): _convert_directory( - tmpdir, None, lambda i, o: None, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + tmpdir, + None, + lambda i, o: None, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) def test_subdirectory_creation(self): @@ -129,13 +138,16 @@ def convert(in_f, out_f): out_f.write(in_f.read()) _convert_directory( - in_dir, out_dir, convert, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) - self.assertTrue( - os.path.exists(os.path.join(out_dir, "sub", "nested.json")) - ) + self.assertTrue(os.path.exists(os.path.join(out_dir, "sub", "nested.json"))) def test_raise_error_without_skip(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -150,13 +162,17 @@ def convert(in_f, out_f): with self.assertRaises(ValueError): _convert_directory( - in_dir, out_dir, convert, False, (ValueError,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (ValueError,), + in_extensions={".tf"}, + out_extension=".json", ) class TestConvertStdin(TestCase): - def test_stdin_forward(self): stdout = StringIO() captured = [] @@ -166,8 +182,7 @@ def convert(in_f, out_f): captured.append(data) out_f.write("output") - with patch("sys.stdin", StringIO("input")), \ - patch("sys.stdout", stdout): + with patch("sys.stdin", StringIO("input")), patch("sys.stdout", stdout): _convert_stdin(convert) self.assertEqual(captured[0], "input") diff --git a/test/unit/cli/test_json_to_hcl.py b/test/unit/cli/test_json_to_hcl.py index 469d9188..dc9a1454 100644 --- a/test/unit/cli/test_json_to_hcl.py +++ b/test/unit/cli/test_json_to_hcl.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import json import os import tempfile @@ -11,19 +12,7 @@ SIMPLE_JSON_DICT = {"x": 1} SIMPLE_JSON = json.dumps(SIMPLE_JSON_DICT) -BLOCK_JSON_DICT = { - "resource": [ - { - "aws_instance": [ - { - "example": [ - {"ami": "abc-123"} - ] - } - ] - } - ] -} +BLOCK_JSON_DICT = {"resource": [{"aws_instance": [{"example": [{"ami": "abc-123"}]}]}]} BLOCK_JSON = json.dumps(BLOCK_JSON_DICT) @@ -38,7 +27,6 @@ def _read_file(path): class TestJsonToHcl(TestCase): - def test_single_file_to_stdout(self): with tempfile.TemporaryDirectory() as tmpdir: json_path = os.path.join(tmpdir, "test.json") diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py index 8803effc..3699ec0e 100644 --- a/test/unit/rules/test_abstract.py +++ b/test/unit/rules/test_abstract.py @@ -1,9 +1,10 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from lark import Token, Tree from lark.tree import Meta -from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.rules.abstract import LarkToken, LarkRule from hcl2.utils import SerializationOptions, SerializationContext diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py index cfb6d666..bcf240a8 100644 --- a/test/unit/rules/test_base.py +++ b/test/unit/rules/test_base.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.const import IS_BLOCK @@ -5,7 +6,15 @@ from hcl2.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule, StringPartRule -from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE, DBLQUOTE, STRING_CHARS, NL_OR_COMMENT +from hcl2.rules.tokens import ( + NAME, + EQ, + LBRACE, + RBRACE, + DBLQUOTE, + STRING_CHARS, + NL_OR_COMMENT, +) from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.utils import SerializationOptions, SerializationContext diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 5ae28df4..196125dd 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.containers import ( diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py index 16800ed0..974885b5 100644 --- a/test/unit/rules/test_expressions.py +++ b/test/unit/rules/test_expressions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.abstract import LarkRule @@ -9,14 +10,13 @@ BinaryOpRule, UnaryOpRule, ) -from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.literal_rules import BinaryOperatorRule from hcl2.rules.tokens import ( LPAR, RPAR, QMARK, COLON, BINARY_OP, - NAME, StringToken, ) from hcl2.utils import SerializationOptions, SerializationContext @@ -67,8 +67,8 @@ def _make_binary_term(op_str, rhs_value): return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) -MINUS_TOKEN = StringToken["MINUS"] -NOT_TOKEN = StringToken["NOT"] +MINUS_TOKEN = StringToken["MINUS"] # type: ignore[type-arg,name-defined] +NOT_TOKEN = StringToken["NOT"] # type: ignore[type-arg,name-defined] # --- ExprTermRule tests --- diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py index febec643..0691d81c 100644 --- a/test/unit/rules/test_for_expressions.py +++ b/test/unit/rules/test_for_expressions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule @@ -45,26 +46,30 @@ def _make_identifier(name): def _make_for_intro_single(iter_name, iterable_value): """Build ForIntroRule with a single iterator: for iter_name in iterable :""" - return ForIntroRule([ - FOR(), - _make_identifier(iter_name), - IN(), - StubExpression(iterable_value), - COLON(), - ]) + return ForIntroRule( + [ + FOR(), + _make_identifier(iter_name), + IN(), + StubExpression(iterable_value), + COLON(), + ] + ) def _make_for_intro_dual(iter1_name, iter2_name, iterable_value): """Build ForIntroRule with dual iterators: for iter1, iter2 in iterable :""" - return ForIntroRule([ - FOR(), - _make_identifier(iter1_name), - COMMA(), - _make_identifier(iter2_name), - IN(), - StubExpression(iterable_value), - COLON(), - ]) + return ForIntroRule( + [ + FOR(), + _make_identifier(iter1_name), + COMMA(), + _make_identifier(iter2_name), + IN(), + StubExpression(iterable_value), + COLON(), + ] + ) def _make_for_cond(value): @@ -87,7 +92,9 @@ def test_first_iterator_single(self): def test_first_iterator_dual(self): i1 = _make_identifier("k") i2 = _make_identifier("v") - rule = ForIntroRule([FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()]) + rule = ForIntroRule( + [FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()] + ) self.assertIs(rule.first_iterator, i1) def test_second_iterator_none_when_single(self): @@ -96,15 +103,17 @@ def test_second_iterator_none_when_single(self): def test_second_iterator_present_when_dual(self): i2 = _make_identifier("v") - rule = ForIntroRule([ - FOR(), - _make_identifier("k"), - COMMA(), - i2, - IN(), - StubExpression("items"), - COLON(), - ]) + rule = ForIntroRule( + [ + FOR(), + _make_identifier("k"), + COMMA(), + i2, + IN(), + StubExpression("items"), + COLON(), + ] + ) self.assertIs(rule.second_iterator, i2) def test_iterable_property(self): @@ -160,61 +169,73 @@ def test_for_intro_property(self): def test_value_expr_property(self): value_expr = StubExpression("expr") - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - value_expr, - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + value_expr, + RSQB(), + ] + ) self.assertIs(rule.value_expr, value_expr) def test_condition_none(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) self.assertIsNone(rule.condition) def test_condition_present(self): cond = _make_for_cond("cond") - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - cond, - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + cond, + RSQB(), + ] + ) self.assertIsInstance(rule.condition, ForCondRule) self.assertIs(rule.condition, cond) def test_serialize_without_condition(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) self.assertEqual(rule.serialize(), "${[for v in items : expr]}") def test_serialize_with_condition(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - _make_for_cond("cond"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + _make_for_cond("cond"), + RSQB(), + ] + ) self.assertEqual(rule.serialize(), "${[for v in items : expr if cond]}") def test_serialize_inside_dollar_string(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) ctx = SerializationContext(inside_dollar_string=True) self.assertEqual(rule.serialize(context=ctx), "[for v in items : expr]") @@ -228,124 +249,144 @@ def test_lark_name(self): def test_for_intro_property(self): intro = _make_for_intro_dual("k", "v", "items") - rule = ForObjectExprRule([ - LBRACE(), - intro, - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + intro, + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIs(rule.for_intro, intro) def test_key_expr_property(self): key_expr = StubExpression("key") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - key_expr, - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + key_expr, + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIs(rule.key_expr, key_expr) def test_value_expr_property(self): value_expr = StubExpression("value") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - value_expr, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ] + ) self.assertIs(rule.value_expr, value_expr) def test_ellipsis_none(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIsNone(rule.ellipsis) def test_ellipsis_present(self): ellipsis = ELLIPSIS() - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - ellipsis, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ellipsis, + RBRACE(), + ] + ) self.assertIs(rule.ellipsis, ellipsis) def test_condition_none(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIsNone(rule.condition) def test_condition_present(self): cond = _make_for_cond("cond") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - cond, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + cond, + RBRACE(), + ] + ) self.assertIsInstance(rule.condition, ForCondRule) self.assertIs(rule.condition, cond) def test_serialize_basic(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertEqual(rule.serialize(), "${{for k, v in items : key => value}}") def test_serialize_with_ellipsis(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - ELLIPSIS(), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ELLIPSIS(), + RBRACE(), + ] + ) result = rule.serialize() self.assertIn("...", result) self.assertEqual(result, "${{for k, v in items : key => value...}}") def test_serialize_with_condition(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - _make_for_cond("cond"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + _make_for_cond("cond"), + RBRACE(), + ] + ) result = rule.serialize() self.assertIn("if cond", result) self.assertEqual(result, "${{for k, v in items : key => value if cond}}") diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 4c4b336e..18a763bd 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py index f8513c21..9a834e14 100644 --- a/test/unit/rules/test_literal_rules.py +++ b/test/unit/rules/test_literal_rules.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.literal_rules import ( diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index e7fb28f1..9abf7eb6 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule @@ -201,7 +202,7 @@ def test_serialize_no_preserve_escapes_quotes(self): self.assertEqual(result, '"say \\"hello\\""') def test_serialize_no_preserve_escapes_backslashes(self): - token = HEREDOC_TEMPLATE('< Date: Sat, 7 Mar 2026 21:09:52 +0100 Subject: [PATCH 36/45] update docs --- README.md | 58 +++++-- docs/usage.md | 306 +++++++++++++++++++++++++++++++++ tree-to-hcl2-reconstruction.md | 248 -------------------------- 3 files changed, 354 insertions(+), 258 deletions(-) create mode 100644 docs/usage.md delete mode 100644 tree-to-hcl2-reconstruction.md diff --git a/README.md b/README.md index 1ff75876..c93f1e07 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ [![Codacy Badge](https://app.codacy.com/project/badge/Grade/2e2015f9297346cbaa788c46ab957827)](https://app.codacy.com/gh/amplify-education/python-hcl2/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) -[![Build Status](https://travis-ci.org/amplify-education/python-hcl2.svg?branch=master)](https://travis-ci.org/amplify-education/python-hcl2) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/amplify-education/python-hcl2/master/LICENSE) [![PyPI](https://img.shields.io/pypi/v/python-hcl2.svg)](https://pypi.org/project/python-hcl2/) [![Python Versions](https://img.shields.io/pypi/pyversions/python-hcl2.svg)](https://pypi.python.org/pypi/python-hcl2) @@ -36,19 +35,58 @@ pip3 install python-hcl2 ### Usage +**HCL2 to Python dict:** + ```python import hcl2 -with open('foo.tf', 'r') as file: - dict = hcl2.load(file) + +with open("main.tf") as f: + data = hcl2.load(f) ``` -### Parse Tree to HCL2 reconstruction +**Python dict to HCL2:** + +```python +import hcl2 + +hcl_string = hcl2.dumps(data) + +with open("output.tf", "w") as f: + hcl2.dump(data, f) +``` -With version 6.x the possibility of HCL2 reconstruction from the Lark Parse Tree and Python dictionaries directly was introduced. +**Building HCL from scratch:** -Documentation and an example of manipulating Lark Parse Tree and reconstructing it back into valid HCL2 can be found in [tree-to-hcl2-reconstruction.md](https://github.com/amplify-education/python-hcl2/blob/main/tree-to-hcl2-reconstruction.md) file. +```python +import hcl2 + +doc = hcl2.Builder() +res = doc.block("resource", labels=["aws_instance", "web"], ami="abc-123", instance_type="t2.micro") +res.block("tags", Name="HelloWorld") + +hcl_string = hcl2.dumps(doc.build()) +``` + +For the full API reference, option dataclasses, intermediate pipeline stages, and more examples +see [docs/usage.md](https://github.com/amplify-education/python-hcl2/blob/main/docs/usage.md). + +### CLI Tools + +python-hcl2 ships two command-line converters: + +```sh +# HCL2 → JSON +hcl2tojson main.tf # prints JSON to stdout +hcl2tojson main.tf output.json # writes to file +hcl2tojson terraform/ output/ # converts a directory + +# JSON → HCL2 +jsontohcl2 output.json # prints HCL2 to stdout +jsontohcl2 output.json main.tf # writes to file +jsontohcl2 output/ terraform/ # converts a directory +``` -More details about reconstruction implementation can be found in PRs #169 and #177. +Both commands accept `-` as PATH to read from stdin. Run `hcl2tojson --help` or `jsontohcl2 --help` for the full list of flags. ## Building From Source @@ -61,7 +99,7 @@ Running `tox` will automatically execute linters as well as the unit tests. You can also run them individually with the `-e` argument. -For example, `tox -e py37-unit` will run the unit tests for python 3.7 +For example, `tox -e py310-unit` will run the unit tests for python 3.10 To see all the available options, run `tox -l`. @@ -81,9 +119,9 @@ You can reach us at We welcome pull requests! For your pull request to be accepted smoothly, we suggest that you: - For any sizable change, first open a GitHub issue to discuss your idea. -- Create a pull request. Explain why you want to make the change and what it’s for. +- Create a pull request. Explain why you want to make the change and what it's for. -We’ll try to answer any PR’s promptly. +We'll try to answer any PR's promptly. ## Limitations diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..f6a5f6d6 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,306 @@ +# python-hcl2 Usage Guide + +## Quick Reference + +| Function | Description | +|---|---| +| `hcl2.load(file)` | Parse an HCL2 file to a Python dict | +| `hcl2.loads(text)` | Parse an HCL2 string to a Python dict | +| `hcl2.dump(data, file)` | Write a Python dict as HCL2 to a file | +| `hcl2.dumps(data)` | Convert a Python dict to an HCL2 string | +| `hcl2.parse(file)` | Parse an HCL2 file to a LarkElement tree | +| `hcl2.parses(text)` | Parse an HCL2 string to a LarkElement tree | +| `hcl2.parse_to_tree(file)` | Parse an HCL2 file to a raw Lark tree | +| `hcl2.parses_to_tree(text)` | Parse an HCL2 string to a raw Lark tree | +| `hcl2.transform(lark_tree)` | Transform a raw Lark tree into a LarkElement tree | +| `hcl2.serialize(tree)` | Serialize a LarkElement tree to a Python dict | +| `hcl2.from_dict(data)` | Convert a Python dict into a LarkElement tree | +| `hcl2.from_json(text)` | Convert a JSON string into a LarkElement tree | +| `hcl2.reconstruct(tree)` | Convert a LarkElement tree (or Lark tree) to HCL2 text | +| `hcl2.Builder()` | Build HCL documents programmatically | + +## HCL to Python dict + +Use `load` / `loads` to parse HCL2 into a Python dictionary: + +```python +import hcl2 + +with open("main.tf") as f: + data = hcl2.load(f) + +# or from a string +data = hcl2.loads('resource "aws_instance" "web" { ami = "abc-123" }') +``` + +### SerializationOptions + +Pass `serialization_options` to control how the dict is produced: + +```python +from hcl2 import loads, SerializationOptions + +data = loads(text, serialization_options=SerializationOptions( + with_meta=True, + wrap_objects=True, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `with_comments` | `bool` | `True` | Include comments in the output | +| `with_meta` | `bool` | `False` | Add `__start_line__` / `__end_line__` metadata | +| `wrap_objects` | `bool` | `False` | Wrap object values as inline HCL2 strings | +| `wrap_tuples` | `bool` | `False` | Wrap tuple values as inline HCL2 strings | +| `explicit_blocks` | `bool` | `True` | Add `__is_block__: True` markers to blocks | +| `preserve_heredocs` | `bool` | `True` | Keep heredocs in their original form | +| `force_operation_parentheses` | `bool` | `False` | Force parentheses around all operations | +| `preserve_scientific_notation` | `bool` | `True` | Keep scientific notation as-is | + +## Python dict to HCL + +Use `dump` / `dumps` to convert a Python dictionary back into HCL2 text: + +```python +import hcl2 + +hcl_string = hcl2.dumps(data) + +with open("output.tf", "w") as f: + hcl2.dump(data, f) +``` + +### DeserializerOptions + +Control how the dict is interpreted when building the LarkElement tree: + +```python +from hcl2 import dumps, DeserializerOptions + +text = dumps(data, deserializer_options=DeserializerOptions( + object_elements_colon=True, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `heredocs_to_strings` | `bool` | `False` | Convert heredocs to plain strings | +| `strings_to_heredocs` | `bool` | `False` | Convert strings with `\n` to heredocs | +| `object_elements_colon` | `bool` | `False` | Use `:` instead of `=` in object elements | +| `object_elements_trailing_comma` | `bool` | `True` | Add trailing commas in object elements | + +### FormatterOptions + +Control whitespace and alignment in the generated HCL2: + +```python +from hcl2 import dumps, FormatterOptions + +text = dumps(data, formatter_options=FormatterOptions( + indent_length=4, + vertically_align_attributes=False, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `indent_length` | `int` | `2` | Number of spaces per indentation level | +| `open_empty_blocks` | `bool` | `True` | Expand empty blocks across multiple lines | +| `open_empty_objects` | `bool` | `True` | Expand empty objects across multiple lines | +| `open_empty_tuples` | `bool` | `False` | Expand empty tuples across multiple lines | +| `vertically_align_attributes` | `bool` | `True` | Vertically align `=` signs in attribute groups | +| `vertically_align_object_elements` | `bool` | `True` | Vertically align `=` signs in object elements | + +## Building HCL from scratch + +The `Builder` class produces dicts with the correct `__is_block__` markers so that `dumps` can distinguish blocks from plain objects: + +```python +import hcl2 + +doc = hcl2.Builder() +res = doc.block("resource", labels=["aws_instance", "web"], + ami="abc-123", instance_type="t2.micro") +res.block("tags", Name="HelloWorld") + +hcl_string = hcl2.dumps(doc.build()) +``` + +Output: + +```hcl +resource "aws_instance" "web" { + ami = "abc-123" + instance_type = "t2.micro" + + tags { + Name = "HelloWorld" + } +} +``` + +### Builder.block() + +```python +block( + block_type: str, + labels: Optional[List[str]] = None, + __nested_builder__: Optional[Builder] = None, + **attributes, +) -> Builder +``` + +Returns the child `Builder` for the new block, allowing chained calls. + +## Intermediate pipeline stages + +The full pipeline looks like this: + +``` +Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict +Reverse: Python Dict → LarkElement Tree → HCL2 Text +``` + +You can access each stage individually for advanced use cases. + +### parse / parses — HCL2 text to LarkElement tree + +```python +tree = hcl2.parses('x = 1') # StartRule +tree = hcl2.parse(open("main.tf")) # StartRule +``` + +Pass `discard_comments=True` to strip comments during transformation. + +### parse_to_tree / parses_to_tree — HCL2 text to raw Lark tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') # lark.Tree +``` + +### transform — raw Lark tree to LarkElement tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') +tree = hcl2.transform(lark_tree) # StartRule +``` + +### serialize — LarkElement tree to Python dict + +```python +tree = hcl2.parses('x = 1') +data = hcl2.serialize(tree) +# or with options: +from hcl2 import SerializationOptions +data = hcl2.serialize(tree, serialization_options=SerializationOptions(with_meta=True)) +``` + +### from_dict / from_json — Python dict or JSON to LarkElement tree + +```python +tree = hcl2.from_dict(data) # StartRule +tree = hcl2.from_json('{"x": 1}') # StartRule +``` + +Both accept optional `deserializer_options`, `formatter_options`, and `apply_format` (default `True`). + +### reconstruct — LarkElement tree (or Lark tree) to HCL2 text + +```python +tree = hcl2.from_dict(data) +text = hcl2.reconstruct(tree) +``` + +## CLI Tools + +### hcl2tojson + +Convert HCL2 files to JSON. + +```sh +hcl2tojson main.tf # print JSON to stdout +hcl2tojson main.tf output.json # write to file +hcl2tojson terraform/ output/ # convert a directory +cat main.tf | hcl2tojson - # read from stdin +``` + +**Flags:** + +| Flag | Description | +|---|---| +| `-s` | Skip un-parsable files | +| `--json-indent N` | JSON indentation width (default: 2) | +| `--with-meta` | Add `__start_line__` / `__end_line__` metadata | +| `--with-comments` | Include comments in the output | +| `--wrap-objects` | Wrap object values as inline HCL2 | +| `--wrap-tuples` | Wrap tuple values as inline HCL2 | +| `--no-explicit-blocks` | Disable `__is_block__` markers | +| `--no-preserve-heredocs` | Convert heredocs to plain strings | +| `--force-parens` | Force parentheses around all operations | +| `--no-preserve-scientific` | Convert scientific notation to standard floats | +| `--version` | Show version and exit | + +### jsontohcl2 + +Convert JSON files to HCL2. + +```sh +jsontohcl2 output.json # print HCL2 to stdout +jsontohcl2 output.json main.tf # write to file +jsontohcl2 output/ terraform/ # convert a directory +cat output.json | jsontohcl2 - # read from stdin +``` + +**Flags:** + +| Flag | Description | +|---|---| +| `-s` | Skip un-parsable files | +| `--indent N` | Indentation width (default: 2) | +| `--colon-separator` | Use `:` instead of `=` in object elements | +| `--no-trailing-comma` | Omit trailing commas in object elements | +| `--heredocs-to-strings` | Convert heredocs to plain strings | +| `--strings-to-heredocs` | Convert strings with escaped newlines to heredocs | +| `--no-open-empty-blocks` | Collapse empty blocks to a single line | +| `--no-open-empty-objects` | Collapse empty objects to a single line | +| `--open-empty-tuples` | Expand empty tuples across multiple lines | +| `--no-align` | Disable vertical alignment of attributes and object elements | +| `--version` | Show version and exit | + +## Pipeline Diagram + +``` + Forward Pipeline + ================ + HCL2 Text + │ + ▼ + ┌──────────────────┐ parse_to_tree / parses_to_tree + │ Lark Parse Tree │ + └────────┬─────────┘ + │ transform + ▼ + ┌──────────────────┐ + │ LarkElement Tree │ parse / parses (shortcut: HCL2 text → here) + └────────┬─────────┘ + │ serialize + ▼ + ┌──────────────────┐ + │ Python Dict │ load / loads (shortcut: HCL2 text → here) + └──────────────────┘ + + + Reverse Pipeline + ================ + Python Dict / JSON + │ + ▼ + ┌──────────────────┐ from_dict / from_json + │ LarkElement Tree │ + └────────┬─────────┘ + │ reconstruct + ▼ + ┌──────────────────┐ + │ HCL2 Text │ dump / dumps (shortcut: Python Dict / JSON → here) + └──────────────────┘ +``` diff --git a/tree-to-hcl2-reconstruction.md b/tree-to-hcl2-reconstruction.md deleted file mode 100644 index 1a5f83dc..00000000 --- a/tree-to-hcl2-reconstruction.md +++ /dev/null @@ -1,248 +0,0 @@ -# Writing HCL2 from Python - -Version 6 of this library supports reconstructing HCL files directly from -Python. This guide details how the reconstruction process takes place. See -also: [Limitations](#limitations) - -There are three major phases: - -- [Building a Python Dictionary](#building-a-python-dictionary) -- [Building an AST](#building-an-ast) -- [Reconstructing the file from the AST](#reconstructing-the-file-from-the-ast) - -## Example - -To create the `example.tf` file with the following content: - -```terraform -resource "aws_s3_bucket" "bucket" { - bucket = "bucket_id" - force_destroy = true -} -``` - -You can use the `hcl2.Builder` class like so: - -```python -import hcl2 - -example = hcl2.Builder() - -example.block( - "resource", - ["aws_s3_bucket", "bucket"], - bucket="bucket_id", - force_destroy=True, -) - -example_dict = example.build() -example_ast = hcl2.reverse_transform(example_dict) -example_file = hcl2.writes(example_ast) - -print(example_file) -# resource "aws_s3_bucket" "bucket" { -# bucket = "bucket_id" -# force_destroy = true -# } -# -``` - -This demonstrates a couple of different phases of the process worth mentioning. - -### Building a Python dictionary - -The `hcl2.Builder` class produces a dictionary that should be identical to the -output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword -argument is important here. HCL "blocks" in the Python dictionary are -identified by the presence of `__start_line__` and `__end_line__` metadata -within them. The `Builder` class handles adding that metadata. If that metadata -is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to -identify what is a block and what is just an attribute with an object value. -Without that metadata, this dictionary: - -```python -{ - "resource": [ - { - "aws_s3_bucket": { - "bucket": { - "bucket": "bucket_id", - "force_destroy": True, - # "__start_line__": -1, - # "__end_line__": -1, - } - } - } - ] -} -``` - -Would produce this HCL output: - -```terraform -resource = [{ - aws_s3_bucket = { - bucket = { - bucket = "bucket_id" - force_destroy = true - } - } -}] -``` - -(This output parses to the same datastructure, but isn't formatted in blocks -as desired by the user. Therefore, using the `Builder` class is recommended.) - -### Building an AST - -The `hcl2.reconstructor.HCLReconstructor` class operates on an "abstract -syntax tree" (`hcl2.AST` or `Lark.Tree`, they're the same.) To produce this AST -from scratch in Python, use `hcl2.reverse_transform(hcl_dict)`, and to produce -this AST from an existing HCL file, use `hcl2.parse(hcl_file)`. - -You can also build these ASTs manually, if you want more control over the -generated HCL output. If you do this, though, make sure the AST you generate is -valid within the `hcl2.lark` grammar. - -Here's an example, which would add a "tags" element to that `example.tf` file -mentioned above. - -```python -from copy import deepcopy -from lark import Token, Tree -import hcl2 - - -def build_tags_tree(base_indent: int = 0) -> Tree: - # build Tree representing following HCL2 structure - # tags = { - # Name = "My bucket" - # Environment = "Dev" - # } - return Tree('attribute', [ - Tree('identifier', [ - Token('NAME', 'tags') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Tree('object', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * (base_indent + 1)), - ]), - Tree('object_elem', [ - Tree('identifier', [ - Token('NAME', 'Name') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Token('STRING_LIT', '"My bucket"') - ]) - ]), - Tree('new_line_and_or_comma', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * (base_indent + 1)), - ]), - ]), - Tree('object_elem', [ - Tree('identifier', [ - Token('NAME', 'Environment') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Token('STRING_LIT', '"Dev"') - ]) - ]), - Tree('new_line_and_or_comma', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * base_indent), - ]), - ]), - ]), - ]) - ]) - - -def is_bucket_block(tree: Tree) -> bool: - # check whether given Tree represents `resource "aws_s3_bucket" "bucket"` - try: - return tree.data == 'block' and tree.children[2].value == '"bucket"' - except IndexError: - return False - - -def insert_tags(tree: Tree, indent: int = 0) -> Tree: - # Insert tags tree and adjust surrounding whitespaces to match indentation - new_children = [*tree.children.copy(), build_tags_tree(indent)] - # add indentation before tags tree - new_children[len(tree.children) - 1] = Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n ') - ]) - # move closing bracket to the new line - new_children.append( - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n') - ]) - ) - return Tree(tree.data, new_children) - - -def process_token(node: Token, indent=0): - # Print details of this token and return its copy - print(f'[{indent}] (token)\t|', ' ' * indent, node.type, node.value) - return deepcopy(node) - - -def process_tree(node: Tree, depth=0) -> Tree: - # Recursively iterate over tree's children - # the depth parameter represents recursion depth, - # it's used to deduce indentation for printing tree and for adjusting whitespace after adding tags - new_children = [] - print(f'[{depth}] (tree)\t|', ' ' * depth, node.data) - for child in node.children: - if isinstance(child, Tree): - if is_bucket_block(child): - block_children = child.children.copy() - # this child is the Tree representing block's actual body - block_children[3] = insert_tags(block_children[3], depth) - # replace original Tree with new one including the modified body - child = Tree(child.data, block_children) - - new_children.append(process_tree(child, depth + 1)) - - else: - new_children.append(process_token(child, depth + 1)) - - return Tree(node.data, new_children) - - -def main(): - tree = hcl2.parse(open('example.tf')) - new_tree = process_tree(tree) - reconstructed = hcl2.writes(new_tree) - open('example_reconstructed.tf', 'w').write(reconstructed) - - -if __name__ == "__main__": - main() - -``` - -### Reconstructing the file from the AST - -Once the AST has been generated, you can convert it back to valid HCL using -`hcl2.writes(ast)`. In the above example, that conversion is done in the -`main()` function. - -## Limitations - -- Some formatting choices are impossible to specify via `hcl2.Builder()` and - require manual intervention of the AST produced after the `reverse_transform` - step. - -- Most notably, this means it's not possible to generate files containing - comments (both inline and block comments) - -- Even when parsing a file directly and writing it back out, some formatting - information may be lost due to Terminals discarded during the parsing process. - The reconstructed output should still parse to the same dictionary at the end - of the day though. From 4a65479fdc406a07892364e51bc270aa0f2c26af Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 22:11:03 +0100 Subject: [PATCH 37/45] fix the limitation of using expressions as object keys --- README.md | 13 +--- hcl2/deserializer.py | 18 +---- hcl2/hcl2.lark | 4 +- hcl2/rules/containers.py | 57 ++++----------- hcl2/transformer.py | 26 +++---- test/integration/hcl2_original/object_keys.tf | 3 + .../hcl2_reconstructed/object_keys.tf | 3 + test/integration/hcl2_reconstructed/smoke.tf | 8 +-- .../json_reserialized/object_keys.json | 5 +- test/integration/json_reserialized/smoke.json | 2 +- .../json_serialized/object_keys.json | 5 +- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_containers.py | 70 ++++--------------- test/unit/test_deserializer.py | 17 +++-- 14 files changed, 70 insertions(+), 163 deletions(-) diff --git a/README.md b/README.md index c93f1e07..c848ef0f 100644 --- a/README.md +++ b/README.md @@ -125,15 +125,4 @@ We'll try to answer any PR's promptly. ## Limitations -### Using inline expression as an object key - -- Object key can be an expression as long as it is wrapped in parentheses: - ```terraform - locals { - foo = "bar" - baz = { - (format("key_prefix_%s", local.foo)) : "value" - # format("key_prefix_%s", local.foo) : "value" this will fail - } - } - ``` +None that are known. diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 03dac5d8..ae6cbc15 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -22,7 +22,6 @@ ObjectRule, ObjectElemRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ObjectElemKeyRule, ) from hcl2.rules.expressions import ExprTermRule @@ -51,7 +50,6 @@ RSQB, LSQB, COMMA, - DOT, LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, @@ -320,20 +318,8 @@ def _deserialize_object(self, value: dict) -> ObjectRule: def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule( - [ - child - for child in self._deserialize_expression(key).children - if child is not None - ] - ) - elif isinstance(key, str) and "." in key: - parts = key.split(".") - dot_children: List[Any] = [] - for part in parts: - dot_children.append(self._deserialize_identifier(part)) - dot_children.append(DOT()) - key = ObjectElemKeyDotAccessor(dot_children[:-1]) # without the last dot + expr = self._deserialize_expression(key) + key = ObjectElemKeyExpressionRule([expr]) else: key = self._deserialize_text(key) diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 4a9f1ec6..f0248290 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -177,9 +177,7 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression -object_elem_key_expression : LPAR expression RPAR -object_elem_key_dot_accessor : identifier (DOT identifier)+ +object_elem_key : expression // Heredocs heredoc_template : HEREDOC_TEMPLATE diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 78e0bdeb..1ca5f69b 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -18,9 +18,6 @@ RBRACE, LSQB, RSQB, - LPAR, - RPAR, - DOT, ) from hcl2.rules.whitespace import ( NewLineOrCommentRule, @@ -114,65 +111,37 @@ def serialize( class ObjectElemKeyExpressionRule(LarkRule): - """Rule for parenthesized expression keys in objects.""" + """Rule for expression keys in objects (bare or parenthesized). - _children_layout: Tuple[ - LPAR, - ExpressionRule, - RPAR, - ] + Holds a single ExpressionRule child. Parenthesized keys like + ``(var.account)`` arrive as an ExprTermRule whose own ``serialize()`` + already emits the surrounding ``(…)``, so this class does not need + separate handling for bare vs parenthesized forms. + """ + + _children_layout: Tuple[ExpressionRule] @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "object_elem_key_expression" + return "object_elem_key" @property def expression(self) -> ExpressionRule: - """Return the parenthesized key expression.""" - return self._children[1] + """Return the key expression.""" + return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - """Serialize to '(expression)' string.""" + """Serialize to '${expression}' string.""" with context.modify(inside_dollar_string=True): - result = f"({self.expression.serialize(options, context)})" + result = str(self.expression.serialize(options, context)) if not context.inside_dollar_string: result = to_dollar_string(result) return result -class ObjectElemKeyDotAccessor(LarkRule): - """Rule for dot-accessor keys in objects (e.g. a.b.c).""" - - _children_layout: Tuple[ - IdentifierRule, - Tuple[ - IdentifierRule, - DOT, - ], - ] - - @staticmethod - def lark_name() -> str: - """Return the grammar rule name.""" - return "object_elem_key_dot_accessor" - - @property - def identifiers(self) -> List[IdentifierRule]: - """Return the chain of identifiers.""" - return [child for child in self._children if isinstance(child, IdentifierRule)] - - def serialize( - self, options=SerializationOptions(), context=SerializationContext() - ) -> Any: - """Serialize to 'a.b.c' string.""" - return ".".join( - identifier.serialize(options, context) for identifier in self.identifiers - ) - - class ObjectElemRule(LarkRule): """Rule for a single key = value element in an object.""" diff --git a/hcl2/transformer.py b/hcl2/transformer.py index aebf9d1f..d483cd90 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -15,7 +15,6 @@ ObjectElemKeyRule, TupleRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ) from hcl2.rules.expressions import ( BinaryTermRule, @@ -198,20 +197,17 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: return ObjectElemRule(args, meta) @v_args(meta=True) - def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: - return ObjectElemKeyRule(args, meta) - - @v_args(meta=True) - def object_elem_key_expression( - self, meta: Meta, args - ) -> ObjectElemKeyExpressionRule: - return ObjectElemKeyExpressionRule(args, meta) - - @v_args(meta=True) - def object_elem_key_dot_accessor( - self, meta: Meta, args - ) -> ObjectElemKeyDotAccessor: - return ObjectElemKeyDotAccessor(args, meta) + def object_elem_key(self, meta: Meta, args): + expr = args[0] + # Simple literals (identifier, string, int, float) wrapped in ExprTermRule + if isinstance(expr, ExprTermRule) and len(expr.children) == 5: + inner = expr.children[2] # position 2 in [None, None, inner, None, None] + if isinstance( + inner, (IdentifierRule, StringRule, IntLitRule, FloatLitRule) + ): + return ObjectElemKeyRule([inner], meta) + # Any other expression (parenthesized or bare) + return ObjectElemKeyExpressionRule([expr], meta) @v_args(meta=True) def arguments(self, meta: Meta, args) -> ArgumentsRule: diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf index 913d5a42..c3f33146 100644 --- a/test/integration/hcl2_original/object_keys.tf +++ b/test/integration/hcl2_original/object_keys.tf @@ -5,4 +5,7 @@ bar = { (var.account) : 3 (format("key_prefix_%s", local.foo)) : 4 "prefix_${var.account}:${var.user}_suffix": 5, + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic" } diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf index 497e65a6..6d20581c 100644 --- a/test/integration/hcl2_reconstructed/object_keys.tf +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -5,4 +5,7 @@ bar = { (var.account) = 3, (format("key_prefix_%s", local.foo)) = 4, "prefix_${var.account}:${var.user}_suffix" = 5, + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic", } diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 970cc1cf..c0358021 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -7,12 +7,12 @@ block label1 label2 { f = "${"this is a string"}" g = 1 == 2 h = { - k1 = 5, - k2 = 10, - "k3" = { + k1 = 5, + k2 = 10, + "k3" = { k4 = "a", }, - (5 + 5) = "d", + (5 + 5) = "d", k5.attr.attr = "e", } i = [ diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json index 8acccdea..3146aa52 100644 --- a/test/integration/json_reserialized/object_keys.json +++ b/test/integration/json_reserialized/object_keys.json @@ -5,6 +5,9 @@ "baz": 2, "${(var.account)}": 3, "${(format(\"key_prefix_%s\", local.foo))}": 4, - "\"prefix_${var.account}:${var.user}_suffix\"": 5 + "\"prefix_${var.account}:${var.user}_suffix\"": 5, + "${1 + 1}": "\"two\"", + "${(2 + 2)}": "\"four\"", + "${format(\"key_%s\", var.name)}": "\"dynamic\"" } } diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 5bcc702a..dbff114f 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -17,7 +17,7 @@ "k4": "\"a\"" }, "${(5 + 5)}": "\"d\"", - "k5.attr.attr": "\"e\"" + "${k5.attr.attr}": "\"e\"" }, "i": [ "a", diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json index 8acccdea..3146aa52 100644 --- a/test/integration/json_serialized/object_keys.json +++ b/test/integration/json_serialized/object_keys.json @@ -5,6 +5,9 @@ "baz": 2, "${(var.account)}": 3, "${(format(\"key_prefix_%s\", local.foo))}": 4, - "\"prefix_${var.account}:${var.user}_suffix\"": 5 + "\"prefix_${var.account}:${var.user}_suffix\"": 5, + "${1 + 1}": "\"two\"", + "${(2 + 2)}": "\"four\"", + "${format(\"key_%s\", var.name)}": "\"dynamic\"" } } diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 5bcc702a..dbff114f 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -17,7 +17,7 @@ "k4": "\"a\"" }, "${(5 + 5)}": "\"d\"", - "k5.attr.attr": "\"e\"" + "${k5.attr.attr}": "\"e\"" }, "i": [ "a", diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 196125dd..0231987d 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -5,7 +5,6 @@ TupleRule, ObjectElemKeyRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ObjectElemRule, ObjectRule, ) @@ -17,9 +16,6 @@ RSQB, LBRACE, RBRACE, - LPAR, - RPAR, - DOT, EQ, COLON, COMMA, @@ -188,70 +184,28 @@ def test_serialize_string(self): class TestObjectElemKeyExpressionRule(TestCase): def test_lark_name(self): - self.assertEqual( - ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" - ) + self.assertEqual(ObjectElemKeyExpressionRule.lark_name(), "object_elem_key") def test_expression_property(self): - expr = StubExpression("5 + 5") - rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + expr = StubExpression("1 + 1") + rule = ObjectElemKeyExpressionRule([expr]) self.assertIs(rule.expression, expr) - def test_serialize(self): - rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + def test_serialize_bare(self): + rule = ObjectElemKeyExpressionRule([StubExpression("1 + 1")]) result = rule.serialize() - self.assertEqual(result, "${(5 + 5)}") + self.assertEqual(result, "${1 + 1}") def test_serialize_inside_dollar_string(self): - rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + rule = ObjectElemKeyExpressionRule([StubExpression("1 + 1")]) ctx = SerializationContext(inside_dollar_string=True) result = rule.serialize(context=ctx) - self.assertEqual(result, "(5 + 5)") - - -# --- ObjectElemKeyDotAccessor tests --- - - -class TestObjectElemKeyDotAccessor(TestCase): - def test_lark_name(self): - self.assertEqual( - ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" - ) + self.assertEqual(result, "1 + 1") - def test_identifiers_property(self): - i1 = _make_identifier("k5") - i2 = _make_identifier("attr") - i3 = _make_identifier("sub") - rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) - idents = rule.identifiers - self.assertEqual(len(idents), 3) - self.assertIs(idents[0], i1) - self.assertIs(idents[1], i2) - self.assertIs(idents[2], i3) - - def test_identifiers_two_segments(self): - i1 = _make_identifier("a") - i2 = _make_identifier("b") - rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) - self.assertEqual(len(rule.identifiers), 2) - - def test_serialize(self): - rule = ObjectElemKeyDotAccessor( - [ - _make_identifier("k5"), - DOT(), - _make_identifier("attr"), - DOT(), - _make_identifier("sub"), - ] - ) - self.assertEqual(rule.serialize(), "k5.attr.sub") - - def test_serialize_two_segments(self): - rule = ObjectElemKeyDotAccessor( - [_make_identifier("a"), DOT(), _make_identifier("b")] - ) - self.assertEqual(rule.serialize(), "a.b") + def test_serialize_function_call(self): + rule = ObjectElemKeyExpressionRule([StubExpression('format("k", v)')]) + result = rule.serialize() + self.assertEqual(result, '${format("k", v)}') # --- ObjectElemRule tests --- diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 41ecd35f..8ae7db67 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -8,7 +8,6 @@ TupleRule, ObjectRule, ObjectElemRule, - ObjectElemKeyDotAccessor, ObjectElemKeyExpressionRule, ) from hcl2.rules.expressions import ExprTermRule @@ -364,15 +363,19 @@ def test_dotted_key_object_element(self): result = d._deserialize_object_elem("a.b", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyDotAccessor) - identifiers = key_rule.value.identifiers - self.assertEqual(len(identifiers), 2) - self.assertEqual(identifiers[0].token.value, "a") - self.assertEqual(identifiers[1].token.value, "b") + self.assertIsInstance(key_rule.value, IdentifierRule) + self.assertEqual(key_rule.value.token.value, "a.b") def test_expression_key_object_element(self): d = _deser() - result = d._deserialize_object_elem("${var.key}", 1) + result = d._deserialize_object_elem("${(var.key)}", 1) + self.assertIsInstance(result, ObjectElemRule) + key_rule = result.key + self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + + def test_bare_expression_key_object_element(self): + d = _deser() + result = d._deserialize_object_elem("${1 + 1}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) From a06600262d749d2900ef68edce8127e12284825d Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 22:34:00 +0100 Subject: [PATCH 38/45] `Formatter._vertically_align_object_elems` - fix alignment for expressions and keys --- hcl2/formatter.py | 27 ++++++++++++++++--- .../hcl2_reconstructed/object_keys.tf | 10 +++---- test/integration/hcl2_reconstructed/smoke.tf | 6 ++--- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 8b691c44..1b0702c9 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -10,7 +10,13 @@ AttributeRule, BodyRule, ) -from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + TupleRule, +) from hcl2.rules.expressions import ExprTermRule from hcl2.rules.for_expressions import ( ForTupleExprRule, @@ -246,9 +252,9 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): ) def _vertically_align_object_elems(self, rule: ObjectRule): - max_length = max(len(elem.key.serialize()) for elem in rule.elements) + max_length = max(self._key_text_width(elem.key) for elem in rule.elements) for elem in rule.elements: - key_length = len(elem.key.serialize()) + key_length = self._key_text_width(elem.key) spaces_to_add = max_length - key_length @@ -258,6 +264,21 @@ def _vertically_align_object_elems(self, rule: ObjectRule): elem.children[1].set_value(" " * spaces_to_add + separator.value) + @staticmethod + def _key_text_width(key: LarkElement) -> int: + """Compute the HCL text width of an object element key.""" + width = len(str(key.serialize())) + # Expression keys serialize with ${...} wrapping (+3 chars vs HCL text). + # Handle both direct ObjectElemKeyExpressionRule (from parser) and + # ObjectElemKeyRule wrapping one (from deserializer). + if isinstance(key, ObjectElemKeyExpressionRule): + width -= 3 + elif isinstance(key, ObjectElemKeyRule) and isinstance( + key.value, ObjectElemKeyExpressionRule + ): + width -= 3 + return width + def _build_newline( self, next_line_indent: int = 0, count: int = 1 ) -> NewLineOrCommentRule: diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf index 6d20581c..002bf6d9 100644 --- a/test/integration/hcl2_reconstructed/object_keys.tf +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -2,10 +2,10 @@ bar = { 0 = 0, "foo" = 1, baz = 2, - (var.account) = 3, - (format("key_prefix_%s", local.foo)) = 4, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, "prefix_${var.account}:${var.user}_suffix" = 5, - 1 + 1 = "two", - (2 + 2) = "four", - format("key_%s", var.name) = "dynamic", + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic", } diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index c0358021..743cf9ac 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -7,9 +7,9 @@ block label1 label2 { f = "${"this is a string"}" g = 1 == 2 h = { - k1 = 5, - k2 = 10, - "k3" = { + k1 = 5, + k2 = 10, + "k3" = { k4 = "a", }, (5 + 5) = "d", From deaf0939b199aec1090ad9d89ac5dc87dade9b2c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:12:48 +0100 Subject: [PATCH 39/45] remove unused test files --- .../terraform-config-json/backend.json | 40 ------ .../helpers/terraform-config-json/blocks.json | 34 ----- .../terraform-config-json/cloudwatch.json | 28 ---- .../terraform-config-json/data_sources.json | 12 -- .../terraform-config-json/empty-heredoc.json | 1 - .../terraform-config-json/escapes.json | 9 -- test/helpers/terraform-config-json/iam.json | 41 ------ .../locals_embedded_condition.json | 11 -- .../locals_embedded_function.json | 7 - ...locals_embedded_multi_function_nested.json | 8 -- .../multiline_expressions.json | 56 -------- test/helpers/terraform-config-json/nulls.json | 1 - .../provider_function.json | 8 -- .../resource_keyword_attribute.json | 16 --- .../terraform-config-json/route_table.json | 24 ---- test/helpers/terraform-config-json/s3.json | 47 ------- .../string_interpolations.json | 13 -- .../terraform-config-json/test_floats.json | 30 ---- .../unicode_strings.json | 20 --- .../terraform-config-json/variables.json | 117 ---------------- .../terraform-config-json/vars.auto.json | 7 - test/helpers/terraform-config/backend.tf | 31 ----- test/helpers/terraform-config/blocks.tf | 22 --- test/helpers/terraform-config/cloudwatch.tf | 24 ---- test/helpers/terraform-config/data_sources.tf | 8 -- .../terraform-config/empty-heredoc.hcl2 | 2 - test/helpers/terraform-config/escapes.tf | 3 - test/helpers/terraform-config/iam.tf | 37 ----- .../locals_embedded_condition.tf | 7 - .../locals_embedded_function.tf | 3 - .../locals_embedded_multi_function_nested.tf | 6 - .../terraform-config/multiline_expressions.tf | 60 -------- .../terraform-config/provider_function.tf | 4 - .../resource_keyword_attribute.tf | 8 -- test/helpers/terraform-config/route_table.tf | 19 --- test/helpers/terraform-config/s3.tf | 36 ----- test/helpers/terraform-config/variables.tf | 129 ------------------ .../helpers/terraform-config/vars.auto.tfvars | 2 - test/helpers/with-meta/data_sources.json | 14 -- test/helpers/with-meta/data_sources.tf | 8 -- 40 files changed, 953 deletions(-) delete mode 100644 test/helpers/terraform-config-json/backend.json delete mode 100644 test/helpers/terraform-config-json/blocks.json delete mode 100644 test/helpers/terraform-config-json/cloudwatch.json delete mode 100644 test/helpers/terraform-config-json/data_sources.json delete mode 100644 test/helpers/terraform-config-json/empty-heredoc.json delete mode 100644 test/helpers/terraform-config-json/escapes.json delete mode 100644 test/helpers/terraform-config-json/iam.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_condition.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_function.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json delete mode 100644 test/helpers/terraform-config-json/multiline_expressions.json delete mode 100644 test/helpers/terraform-config-json/nulls.json delete mode 100644 test/helpers/terraform-config-json/provider_function.json delete mode 100644 test/helpers/terraform-config-json/resource_keyword_attribute.json delete mode 100644 test/helpers/terraform-config-json/route_table.json delete mode 100644 test/helpers/terraform-config-json/s3.json delete mode 100644 test/helpers/terraform-config-json/string_interpolations.json delete mode 100644 test/helpers/terraform-config-json/test_floats.json delete mode 100644 test/helpers/terraform-config-json/unicode_strings.json delete mode 100644 test/helpers/terraform-config-json/variables.json delete mode 100644 test/helpers/terraform-config-json/vars.auto.json delete mode 100644 test/helpers/terraform-config/backend.tf delete mode 100644 test/helpers/terraform-config/blocks.tf delete mode 100644 test/helpers/terraform-config/cloudwatch.tf delete mode 100644 test/helpers/terraform-config/data_sources.tf delete mode 100644 test/helpers/terraform-config/empty-heredoc.hcl2 delete mode 100644 test/helpers/terraform-config/escapes.tf delete mode 100644 test/helpers/terraform-config/iam.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_condition.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_function.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_multi_function_nested.tf delete mode 100644 test/helpers/terraform-config/multiline_expressions.tf delete mode 100644 test/helpers/terraform-config/provider_function.tf delete mode 100644 test/helpers/terraform-config/resource_keyword_attribute.tf delete mode 100644 test/helpers/terraform-config/route_table.tf delete mode 100644 test/helpers/terraform-config/s3.tf delete mode 100644 test/helpers/terraform-config/variables.tf delete mode 100644 test/helpers/terraform-config/vars.auto.tfvars delete mode 100644 test/helpers/with-meta/data_sources.json delete mode 100644 test/helpers/with-meta/data_sources.tf diff --git a/test/helpers/terraform-config-json/backend.json b/test/helpers/terraform-config-json/backend.json deleted file mode 100644 index 482838c7..00000000 --- a/test/helpers/terraform-config-json/backend.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "provider": [ - { - "aws": { - "region": "${var.region}" - } - }, - { - "aws": { - "region": "${(var.backup_region)}", - "alias": "backup" - } - } - ], - "terraform": [ - { - "required_version": "0.12" - }, - { - "backend": [ - { - "gcs": {} - } - ], - "required_providers": [ - { - "aws": { - "source": "hashicorp/aws" - }, - "null": { - "source": "hashicorp/null" - }, - "template": { - "source": "hashicorp/template" - } - } - ] - } - ] -} diff --git a/test/helpers/terraform-config-json/blocks.json b/test/helpers/terraform-config-json/blocks.json deleted file mode 100644 index 716ece56..00000000 --- a/test/helpers/terraform-config-json/blocks.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "block": [ - { - "a": 1 - }, - { - "label": { - "b": 2, - "nested_block_1": [ - { - "a": { - "foo": "bar" - } - }, - { - "a": { - "b": { - "bar": "foo" - } - } - }, - { - "foobar": "barfoo" - } - ], - "nested_block_2": [ - { - "barfoo": "foobar" - } - ] - } - } - ] -} diff --git a/test/helpers/terraform-config-json/cloudwatch.json b/test/helpers/terraform-config-json/cloudwatch.json deleted file mode 100644 index f9dafc99..00000000 --- a/test/helpers/terraform-config-json/cloudwatch.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "resource": [ - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule": { - "name": "name", - "event_pattern": " {\n \"foo\": \"bar\",\n \"foo2\": \"EOF_CONFIG\"\n }" - } - } - }, - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule2": { - "name": "name", - "event_pattern": "{\n \"foo\": \"bar\",\n \"foo2\": \"EOF_CONFIG\"\n}" - } - } - }, - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule2": { - "name": "name", - "event_pattern": "${jsonencode(var.cloudwatch_pattern_deploytool)}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/data_sources.json b/test/helpers/terraform-config-json/data_sources.json deleted file mode 100644 index f159c937..00000000 --- a/test/helpers/terraform-config-json/data_sources.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "data": [ - { - "terraform_remote_state": { - "map": { - "for_each": "${{for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : regex(local.remote_state_regex, s3_bucket_key)[\"account_alias\"] => s3_bucket_key if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0}}", - "backend": "s3" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/empty-heredoc.json b/test/helpers/terraform-config-json/empty-heredoc.json deleted file mode 100644 index c1989c0d..00000000 --- a/test/helpers/terraform-config-json/empty-heredoc.json +++ /dev/null @@ -1 +0,0 @@ -{"bar": ""} diff --git a/test/helpers/terraform-config-json/escapes.json b/test/helpers/terraform-config-json/escapes.json deleted file mode 100644 index 41c7d54f..00000000 --- a/test/helpers/terraform-config-json/escapes.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "block": [ - { - "block_with_newlines": { - "a": "line1\nline2" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/iam.json b/test/helpers/terraform-config-json/iam.json deleted file mode 100644 index 8705360e..00000000 --- a/test/helpers/terraform-config-json/iam.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "data": [ - { - "aws_iam_policy_document": { - "policy": { - "statement": [ - { - "effect": "Deny", - "principals": [ - { - "type": "AWS", - "identifiers": [ - "*" - ] - } - ], - "actions": [ - "s3:PutObjectAcl" - ], - "resources": "${aws_s3_bucket.bucket.*.arn.bar}" - } - ] - } - } - }, - { - "aws_iam_policy_document": { - "s3_proxy_policy": { - "statement": [ - { - "actions": [ - "s3:GetObject" - ], - "resources": "${[for bucket_name in local.buckets_to_proxy : \"arn:aws:s3:::${bucket_name}/*\" if substr(bucket_name, 0, 1) == \"l\"]}" - } - ] - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_condition.json b/test/helpers/terraform-config-json/locals_embedded_condition.json deleted file mode 100644 index 6c41e5e8..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_condition.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "locals": [ - { - "terraform": { - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": null - } - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_function.json b/test/helpers/terraform-config-json/locals_embedded_function.json deleted file mode 100644 index 51cf6454..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_function.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "locals": [ - { - "function_test": "${var.basename}-${var.forwarder_function_name}_${md5(\"${var.vpc_id}${data.aws_region.current.name}\")}" - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json b/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json deleted file mode 100644 index f210a087..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "locals": [ - { - "multi_function": "${substr(split(\"-\", \"us-west-2\")[0], 0, 1)}", - "multi_function_embedded": "${substr(split(\"-\", \"us-west-2\")[0], 0, 1)}" - } - ] -} diff --git a/test/helpers/terraform-config-json/multiline_expressions.json b/test/helpers/terraform-config-json/multiline_expressions.json deleted file mode 100644 index 7f3405c0..00000000 --- a/test/helpers/terraform-config-json/multiline_expressions.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "resource": [ - { - "null_resource": { - "multiline_comment_multiline": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_before_closing_bracket": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_between_brackets": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_after_opening_bracket": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_multiple_single_element": { - "triggers": [ - 2 - ] - } - } - } - ], - "variable": [ - { - "some_var2": { - "description": "description", - "type": "string", - "default": "${cidrsubnets(\"10.0.0.0/24\", 2, 2)}" - } - }, - { - "some_var3": { - "description": "description", - "default": "${concat([{\"1\": \"1\"}], [{\"2\": \"2\"}])}" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/nulls.json b/test/helpers/terraform-config-json/nulls.json deleted file mode 100644 index d4a9d448..00000000 --- a/test/helpers/terraform-config-json/nulls.json +++ /dev/null @@ -1 +0,0 @@ -{"terraform": {"unary": "${!null}", "binary": "${(a == null)}", "tuple": [null, 1, 2], "single": null, "conditional": "${null ? null : null}"}} diff --git a/test/helpers/terraform-config-json/provider_function.json b/test/helpers/terraform-config-json/provider_function.json deleted file mode 100644 index 2b749c13..00000000 --- a/test/helpers/terraform-config-json/provider_function.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "locals": [ - { - "name2": "${provider::test2::test(\"a\")}", - "name3": "${test(\"a\")}" - } - ] -} diff --git a/test/helpers/terraform-config-json/resource_keyword_attribute.json b/test/helpers/terraform-config-json/resource_keyword_attribute.json deleted file mode 100644 index 11ff88f9..00000000 --- a/test/helpers/terraform-config-json/resource_keyword_attribute.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "resource": [ - { - "custom_provider_resource": { - "resource_name": { - "name": "resource_name", - "attribute": "attribute_value", - "if" : "attribute_value2", - "in" : "attribute_value3", - "for" : "attribute_value4", - "for_each" : "attribute_value5" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/route_table.json b/test/helpers/terraform-config-json/route_table.json deleted file mode 100644 index af21a922..00000000 --- a/test/helpers/terraform-config-json/route_table.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "resource": [ - { - "aws_route": { - "tgw": { - "count": "${(var.tgw_name == \"\" ? 0 : var.number_of_az)}", - "route_table_id": "${aws_route_table.rt[count.index].id}", - "destination_cidr_block": "10.0.0.0/8", - "transit_gateway_id": "${data.aws_ec2_transit_gateway.tgw[0].id}" - } - } - }, - { - "aws_route": { - "tgw-dot-index": { - "count": "${(var.tgw_name == \"\" ? 0 : var.number_of_az)}", - "route_table_id": "${aws_route_table.rt[count.index].id}", - "destination_cidr_block": "10.0.0.0/8", - "transit_gateway_id": "${data.aws_ec2_transit_gateway.tgw[0].id}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/s3.json b/test/helpers/terraform-config-json/s3.json deleted file mode 100644 index d3318a21..00000000 --- a/test/helpers/terraform-config-json/s3.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "resource": [ - { - "aws_s3_bucket": { - "name": { - "bucket": "name", - "acl": "log-delivery-write", - "lifecycle_rule": [ - { - "id": "to_glacier", - "prefix": "", - "enabled": true, - "expiration": [ - { - "days": 365 - } - ], - "transition": { - "days": 30, - "storage_class": "GLACIER" - } - } - ], - "versioning": [ - { - "enabled": true - } - ] - } - } - } - ], - "module": [ - { - "bucket_name": { - "source": "s3_bucket_name", - "name": "audit", - "account": "${var.account}", - "region": "${var.region}", - "providers": { - "aws.ue1": "${aws}", - "aws.uw2.attribute": "${aws.backup}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/string_interpolations.json b/test/helpers/terraform-config-json/string_interpolations.json deleted file mode 100644 index 885baf89..00000000 --- a/test/helpers/terraform-config-json/string_interpolations.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "locals": [ - { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo", - "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}", - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": "${\"bar\"}$${baz:bat}", - "simple_and_escaped_reversed": "$${baz:bat}${\"bar\"}", - "nested_escaped": "bar-${\"$${baz:bat}\"}" - } - ] -} diff --git a/test/helpers/terraform-config-json/test_floats.json b/test/helpers/terraform-config-json/test_floats.json deleted file mode 100644 index 87ed65c3..00000000 --- a/test/helpers/terraform-config-json/test_floats.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "locals": [ - { - "simple_float": 123.456, - "small_float": 0.123, - "large_float": 9876543.21, - "negative_float": -42.5, - "negative_small": -0.001, - "scientific_positive": "${1.23e5}", - "scientific_negative": "${9.87e-3}", - "scientific_large": "${6.022e+23}", - "integer_as_float": 100.0, - "float_calculation": "${105e+2 * 3.0 / 2.1}", - "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", - "float_list": [ - 1.1, - 2.2, - 3.3, - -4.4, - "${5.5e2}" - ], - "float_object": { - "pi": 3.14159, - "euler": 2.71828, - "sqrt2": 1.41421, - "scientific": "${-123e+2}" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/unicode_strings.json b/test/helpers/terraform-config-json/unicode_strings.json deleted file mode 100644 index 8eedf932..00000000 --- a/test/helpers/terraform-config-json/unicode_strings.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "locals": [ - { - "basic_unicode": "Hello, 世界! こんにちは Привет नमस्ते", - "unicode_escapes": "© ♥ ♪ ☠ ☺", - "emoji_string": "🚀 🌍 🔥 🎉", - "rtl_text": "English and العربية text mixed", - "complex_unicode": "Python (파이썬) es 很棒的! ♥ αβγδ", - "ascii": "ASCII: abc123", - "emoji": "Emoji: 🚀🌍🔥🎉", - "math": "Math: ∑∫√∞≠≤≥", - "currency": "Currency: £€¥₹₽₩", - "arrows": "Arrows: ←↑→↓↔↕", - "cjk": "CJK: 你好世界안녕하세요こんにちは", - "cyrillic": "Cyrillic: Привет мир", - "special": "Special: ©®™§¶†‡", - "mixed_content": "Line with interpolation: ${var.name}\nLine with emoji: 👨‍👩‍👧‍👦\nLine with quotes: \"quoted text\"\nLine with backslash: \\escaped" - } - ] -} diff --git a/test/helpers/terraform-config-json/variables.json b/test/helpers/terraform-config-json/variables.json deleted file mode 100644 index d344902c..00000000 --- a/test/helpers/terraform-config-json/variables.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "variable": [ - { - "region": {} - }, - { - "account": {} - }, - { - "azs": { - "default": { - "us-west-1": "us-west-1c,us-west-1b", - "us-west-2": "us-west-2c,us-west-2b,us-west-2a", - "us-east-1": "us-east-1c,us-east-1b,us-east-1a", - "eu-central-1": "eu-central-1a,eu-central-1b,eu-central-1c", - "sa-east-1": "sa-east-1a,sa-east-1c", - "ap-northeast-1": "ap-northeast-1a,ap-northeast-1c,ap-northeast-1d", - "ap-southeast-1": "ap-southeast-1a,ap-southeast-1b,ap-southeast-1c", - "ap-southeast-2": "ap-southeast-2a,ap-southeast-2b,ap-southeast-2c" - } - } - }, - { - "options": { - "type": "string", - "default": {} - } - }, - { - "var_with_validation": { - "type": "${list(object({\"id\": \"string\", \"nested\": \"${list(object({\"id\": \"string\", \"type\": \"string\"}))}\"}))}", - "validation": [ - { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].id) : can(regex(\"^(A|B)$\", v))], false)}", - "error_message": "The property `id` must be one of value [A, B]." - }, - { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].nested[*].type) : can(regex(\"^(A|B)$\", v))], false)}", - "error_message": "The property `nested.type` must be one of value [A, B]." - } - ] - } - } - ], - "locals": [ - { - "foo": "${var.account}_bar", - "bar": { - "baz": 1, - "${(var.account)}": 2, - "${(format(\"key_prefix_%s\", local.foo))}": 3, - "\"prefix_${var.account}:${var.user}_suffix\"": "interpolation" - }, - "tuple": ["${local.foo}"], - "empty_tuple": [] - }, - { - "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {\"aws_account_ids\": \"${[for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}\"} ...}}", - "has_valid_forwarding_rules_template_inputs": "${(length(keys(var.forwarding_rules_template.copy_resolver_rules)) > 0 && length(var.forwarding_rules_template.replace_with_target_ips) > 0 && length(var.forwarding_rules_template.exclude_cidrs) > 0)}", - "for_whitespace": "${{for i in [1, 2, 3] : i => i ...}}" - }, - { - "nested_data": [ - { - "id": 1, - "nested": [ - { - "id": "a", - "again": [ - { - "id": "a1" - }, - { - "id": "b1" - } - ] - }, - { - "id": "c" - } - ] - }, - { - "id": 1, - "nested": [ - { - "id": "a", - "again": [ - { - "id": "a2" - }, - { - "id": "b2" - } - ] - }, - { - "id": "b", - "again": [ - { - "id": "a" - }, - { - "id": "b" - } - ] - } - ] - } - ], - "ids_level_1": "${distinct(local.nested_data[*].id)}", - "ids_level_2": "${flatten(local.nested_data[*].nested[*].id)}", - "ids_level_3": "${flatten(local.nested_data[*].nested[*].again[*][0].foo.bar[0])}", - "bindings_by_role": "${distinct(flatten([for name in local.real_entities : [for role , members in var.bindings : {\"name\": \"${name}\", \"role\": \"${role}\", \"members\": \"${members}\"}]]))}" - } - ] -} diff --git a/test/helpers/terraform-config-json/vars.auto.json b/test/helpers/terraform-config-json/vars.auto.json deleted file mode 100644 index e8ead394..00000000 --- a/test/helpers/terraform-config-json/vars.auto.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "foo": "bar", - "arr": [ - "foo", - "bar" - ] -} diff --git a/test/helpers/terraform-config/backend.tf b/test/helpers/terraform-config/backend.tf deleted file mode 100644 index bd22a869..00000000 --- a/test/helpers/terraform-config/backend.tf +++ /dev/null @@ -1,31 +0,0 @@ -// test new line braces style -provider "aws" -{ - region = var.region -} - -# another comment -provider "aws" { - region = (var.backup_region) - alias = "backup" -} - -/* -one last comment -*/ -terraform { required_version = "0.12" } - -terraform { - backend "gcs" {} - required_providers { - aws = { - source = "hashicorp/aws", - } - null = { - source = "hashicorp/null", - } - template = { - source = "hashicorp/template", - } - } -} diff --git a/test/helpers/terraform-config/blocks.tf b/test/helpers/terraform-config/blocks.tf deleted file mode 100644 index bd8e5159..00000000 --- a/test/helpers/terraform-config/blocks.tf +++ /dev/null @@ -1,22 +0,0 @@ -block { - a = 1 -} - -block "label" { - b = 2 - nested_block_1 "a" { - foo = "bar" - } - - nested_block_1 "a" "b" { - bar = "foo" - } - - nested_block_1 { - foobar = "barfoo" - } - - nested_block_2 { - barfoo = "foobar" - } -} diff --git a/test/helpers/terraform-config/cloudwatch.tf b/test/helpers/terraform-config/cloudwatch.tf deleted file mode 100644 index 8928b810..00000000 --- a/test/helpers/terraform-config/cloudwatch.tf +++ /dev/null @@ -1,24 +0,0 @@ -resource "aws_cloudwatch_event_rule" "aws_cloudwatch_event_rule" { - name = "name" - event_pattern = < s3_bucket_key - if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0 - } - backend = "s3" -} diff --git a/test/helpers/terraform-config/empty-heredoc.hcl2 b/test/helpers/terraform-config/empty-heredoc.hcl2 deleted file mode 100644 index c701dac2..00000000 --- a/test/helpers/terraform-config/empty-heredoc.hcl2 +++ /dev/null @@ -1,2 +0,0 @@ -bar = < { - aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[ - forwarding_rule_key - ].aws_account_names : - module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - ] - } - ... - } - has_valid_forwarding_rules_template_inputs = ( - length(keys(var.forwarding_rules_template.copy_resolver_rules)) > 0 - && length(var.forwarding_rules_template.replace_with_target_ips) > 0 && - length(var.forwarding_rules_template.exclude_cidrs) > 0 - ) - - for_whitespace = { for i in [1, 2, 3] : - i => - i ... - } -} - -locals { - nested_data = [ - { - id = 1, - nested = [ - { - id = "a" - again = [ - { id = "a1" }, - { id = "b1" } - ] - }, - { id = "c" } - ] - }, - { - id = 1 - nested = [ - { - id = "a" - again = [ - { id = "a2" }, - { id = "b2" } - ] - }, - { - id = "b" - again = [ - { id = "a" }, - { id = "b" } - ] - } - ] - } - ] - - ids_level_1 = distinct(local.nested_data[*].id) - ids_level_2 = flatten(local.nested_data[*].nested[*].id) - ids_level_3 = flatten(local.nested_data[*].nested[*].again[*][0].foo.bar[0]) - bindings_by_role = distinct(flatten([ - for name in local.real_entities - : [ - for role, members in var.bindings - : { name = name, role = role, members = members } - ] - ])) -} diff --git a/test/helpers/terraform-config/vars.auto.tfvars b/test/helpers/terraform-config/vars.auto.tfvars deleted file mode 100644 index 9fd3a49d..00000000 --- a/test/helpers/terraform-config/vars.auto.tfvars +++ /dev/null @@ -1,2 +0,0 @@ -foo = "bar" -arr = ["foo", "bar"] diff --git a/test/helpers/with-meta/data_sources.json b/test/helpers/with-meta/data_sources.json deleted file mode 100644 index f04e0ff9..00000000 --- a/test/helpers/with-meta/data_sources.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "data": [ - { - "terraform_remote_state": { - "map": { - "for_each": "${{for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : regex(local.remote_state_regex, s3_bucket_key)[\"account_alias\"] => s3_bucket_key if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0}}", - "backend": "s3", - "__start_line__": 1, - "__end_line__": 8 - } - } - } - ] -} diff --git a/test/helpers/with-meta/data_sources.tf b/test/helpers/with-meta/data_sources.tf deleted file mode 100644 index 8e4cc25a..00000000 --- a/test/helpers/with-meta/data_sources.tf +++ /dev/null @@ -1,8 +0,0 @@ -data "terraform_remote_state" "map" { - for_each = { - for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : - regex(local.remote_state_regex, s3_bucket_key)["account_alias"] => s3_bucket_key - if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0 - } - backend = "s3" -} From 4c08d6eb0b04e824097eba39aaea2950177b5a90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:14:28 +0100 Subject: [PATCH 40/45] fix some minor issues; add more cli tests --- hcl2/formatter.py | 3 +- hcl2/rules/base.py | 4 +- hcl2/rules/expressions.py | 8 +++- hcl2/transformer.py | 4 -- hcl2/utils.py | 7 ---- test/unit/cli/test_hcl_to_json.py | 64 ++++++++++++++++++++++++++++++- test/unit/cli/test_json_to_hcl.py | 61 +++++++++++++++++++++++++++++ test/unit/test_api.py | 24 +++++++++++- test/unit/test_utils.py | 7 ---- 9 files changed, 155 insertions(+), 27 deletions(-) diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 1b0702c9..29a9c6f6 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -293,7 +293,8 @@ def _build_newline( return result def _deindent_last_line(self, times: int = 1): - assert self._last_new_line is not None + if self._last_new_line is None: + return token = self._last_new_line.token for _ in range(times): if token.value.endswith(" " * self.options.indent_length): diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index 26a31247..cf129ce1 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -10,7 +10,7 @@ from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule -from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE +from hcl2.rules.tokens import EQ, LBRACE, RBRACE from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.utils import SerializationOptions, SerializationContext @@ -148,7 +148,7 @@ def lark_name() -> str: return "block" @property - def labels(self) -> List[NAME]: + def labels(self) -> List[Union[IdentifierRule, StringRule]]: """Return the block label chain (type name, optional string labels).""" return list(filter(lambda label: label is not None, self._labels)) diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index e6aa1670..c29859a3 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -27,8 +27,8 @@ class ExpressionRule(InlineCommentMixIn, ABC): @staticmethod def lark_name() -> str: - """Return the grammar rule name.""" - return "expression" + """?expression is transparent in Lark — subclasses must override.""" + raise NotImplementedError("ExpressionRule.lark_name() must be overridden") def __init__( self, children, meta: Optional[Meta] = None, parentheses: bool = False @@ -221,6 +221,10 @@ class BinaryOpRule(ExpressionRule): Optional[NewLineOrCommentRule], ] + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [2]) + super().__init__(children, meta) + @staticmethod def lark_name() -> str: """Return the grammar rule name.""" diff --git a/hcl2/transformer.py b/hcl2/transformer.py index d483cd90..d0a09630 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -217,10 +217,6 @@ def arguments(self, meta: Meta, args) -> ArgumentsRule: def function_call(self, meta: Meta, args) -> FunctionCallRule: return FunctionCallRule(args, meta) - # @v_args(meta=True) - # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: - # return ProviderFunctionCallRule(args, meta) - @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) diff --git a/hcl2/utils.py b/hcl2/utils.py index 4eb31470..7e349558 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -2,7 +2,6 @@ import re from contextlib import contextmanager from dataclasses import dataclass, replace -from typing import Generator HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) @@ -33,12 +32,6 @@ def replace(self, **kwargs) -> "SerializationContext": """Return a new context with the given fields overridden.""" return replace(self, **kwargs) - @contextmanager - def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: - """Context manager that yields a modified copy of the context""" - modified_context = self.replace(**kwargs) - yield modified_context - @contextmanager def modify(self, **kwargs): """Context manager that temporarily mutates fields, restoring on exit.""" diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 0b40c896..1615487f 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -185,8 +185,10 @@ def test_skip_error_with_output_file(self): with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): main() - if os.path.exists(out_path): - self.assertEqual(_read_file(out_path), "") + # The output file is created (opened for writing) before + # conversion; on a skipped error it will be empty. + self.assertTrue(os.path.exists(out_path)) + self.assertEqual(_read_file(out_path), "") def test_raise_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -222,6 +224,64 @@ def test_raise_error_to_stdout(self): main() +class TestHclToJsonFlags(TestCase): + def _run_hcl_to_json(self, hcl_content, extra_flags=None): + """Helper: write HCL to a temp file, run main() with flags, return parsed JSON.""" + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_content) + + stdout = StringIO() + argv = ["hcl2tojson"] + (extra_flags or []) + [hcl_path] + with patch("sys.argv", argv): + with patch("sys.stdout", stdout): + main() + return json.loads(stdout.getvalue()) + + def test_no_explicit_blocks_flag(self): + hcl = 'resource "a" "b" {\n x = 1\n}\n' + default = self._run_hcl_to_json(hcl) + no_blocks = self._run_hcl_to_json(hcl, ["--no-explicit-blocks"]) + # With explicit blocks, the value is wrapped in a list; without, it may differ + self.assertNotEqual(default, no_blocks) + + def test_no_preserve_heredocs_flag(self): + hcl = "x = < Date: Sat, 7 Mar 2026 23:34:47 +0100 Subject: [PATCH 41/45] fix another bunch of issues --- hcl2/deserializer.py | 7 ++-- hcl2/formatter.py | 38 +++++++++++--------- hcl2/reconstructor.py | 18 ---------- hcl2/rules/containers.py | 2 +- hcl2/rules/indexing.py | 8 ++--- hcl2/rules/strings.py | 8 +++-- test/integration/hcl2_reconstructed/smoke.tf | 2 +- test/unit/rules/test_containers.py | 4 ++- test/unit/test_deserializer.py | 4 +-- test/unit/test_formatter.py | 37 +++++++++++++++++++ 10 files changed, 80 insertions(+), 48 deletions(-) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index ae6cbc15..212dcc6f 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -317,14 +317,17 @@ def _deserialize_object(self, value: dict) -> ObjectRule: return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: + key_rule: Union[ObjectElemKeyExpressionRule, ObjectElemKeyRule] + if self._is_expression(key): expr = self._deserialize_expression(key) - key = ObjectElemKeyExpressionRule([expr]) + key_rule = ObjectElemKeyExpressionRule([expr]) else: key = self._deserialize_text(key) + key_rule = ObjectElemKeyRule([key]) result = [ - ObjectElemKeyRule([key]), + key_rule, COLON() if self.options.object_elements_colon else EQ(), ExprTermRule([self._deserialize(value)]), ] diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 29a9c6f6..c1bac9df 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from typing import List, Optional -from hcl2.rules.abstract import LarkElement +from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( StartRule, BlockRule, @@ -13,7 +13,6 @@ from hcl2.rules.containers import ( ObjectRule, ObjectElemRule, - ObjectElemKeyRule, ObjectElemKeyExpressionRule, TupleRule, ) @@ -104,7 +103,7 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): if new_children: new_children.pop(-1) - rule._children = new_children + self._set_children(rule, new_children) def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): """Format an attribute rule by formatting its value expression.""" @@ -127,7 +126,7 @@ def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): new_children.append(self._build_newline(indent_level)) self._deindent_last_line() - rule._children = new_children + self._set_children(rule, new_children) def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): """Format an object rule with one element per line and optional alignment.""" @@ -157,7 +156,7 @@ def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): new_children.insert(-1, self._build_newline(indent_level)) self._deindent_last_line() - rule._children = new_children + self._set_children(rule, new_children) if self.options.vertically_align_object_elements: self._vertically_align_object_elems(rule) @@ -215,8 +214,10 @@ def format_forobjectexpr( for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) - expression.children[6] = None - expression.children[8] = None + for index in [6, 8]: + child = expression.children[index] + if not isinstance(child, NewLineOrCommentRule) or child.to_list() is None: + expression.children[index] = None if expression.condition is not None: expression.children[10] = self._build_newline(indent_level) @@ -226,6 +227,15 @@ def format_forobjectexpr( expression.children[12] = self._build_newline(indent_level) self._deindent_last_line() + @staticmethod + def _set_children(rule: LarkRule, new_children): + """Replace a rule's children and re-establish parent/index links.""" + rule._children = new_children + for i, child in enumerate(new_children): + if child is not None: + child.set_index(i) + child.set_parent(rule) + def _vertically_align_attributes_in_body(self, body: BodyRule): attributes_sequence: List[AttributeRule] = [] @@ -247,9 +257,8 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): for attribute in attributes_sequence: name_length = len(attribute.identifier.token.value) spaces_to_add = max_length - name_length - attribute.children[1].set_value( - " " * spaces_to_add + attribute.children[1].value - ) + base = attribute.children[1].value.lstrip(" ") + attribute.children[1].set_value(" " * spaces_to_add + base) def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(self._key_text_width(elem.key) for elem in rule.elements) @@ -262,21 +271,16 @@ def _vertically_align_object_elems(self, rule: ObjectRule): if isinstance(separator, COLON): # type: ignore[misc] spaces_to_add += 1 - elem.children[1].set_value(" " * spaces_to_add + separator.value) + base = separator.value.lstrip(" ") + elem.children[1].set_value(" " * spaces_to_add + base) @staticmethod def _key_text_width(key: LarkElement) -> int: """Compute the HCL text width of an object element key.""" width = len(str(key.serialize())) # Expression keys serialize with ${...} wrapping (+3 chars vs HCL text). - # Handle both direct ObjectElemKeyExpressionRule (from parser) and - # ObjectElemKeyRule wrapping one (from deserializer). if isinstance(key, ObjectElemKeyExpressionRule): width -= 3 - elif isinstance(key, ObjectElemKeyRule) and isinstance( - key.value, ObjectElemKeyExpressionRule - ): - width -= 3 return width def _build_newline( diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index b9f3b3ce..fd437347 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -38,9 +38,6 @@ def __init__(self): self._current_indent = 0 self._last_token_name: Optional[str] = None self._last_rule_name: Optional[str] = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False def _reset_state(self): """Reset state tracking for formatting decisions.""" @@ -48,9 +45,6 @@ def _reset_state(self): self._current_indent = 0 self._last_token_name = None self._last_rule_name = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False # pylint:disable=R0911,R0912 def _should_add_space_before( @@ -178,21 +172,9 @@ def _reconstruct_tree( self._last_was_space = True elif rule_name == ExprTermRule.lark_name(): - # Check if parenthesized - if ( - len(tree.children) >= 3 - and isinstance(tree.children[0], Token) - and tree.children[0].type == tokens.LPAR.lark_name() - and isinstance(tree.children[-1], Token) - and tree.children[-1].type == tokens.RPAR.lark_name() - ): - self._in_parentheses = True - for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) - self._in_parentheses = False - else: for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 1ca5f69b..671d98b7 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -124,7 +124,7 @@ class ObjectElemKeyExpressionRule(LarkRule): @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "object_elem_key" + return "object_elem_key_expr" @property def expression(self) -> ExpressionRule: diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index 5cfefb96..455ae6ef 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -47,7 +47,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to '.N' string.""" - return f".{self.index.serialize(options)}" + return f".{self.index.serialize(options, context)}" class SqbIndexRule(InlineCommentMixIn): @@ -75,7 +75,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to '[expr]' string.""" - return f"[{self.index_expression.serialize(options)}]" + return f"[{self.index_expression.serialize(options, context)}]" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children, [1, 3]) @@ -97,8 +97,8 @@ def serialize( ) -> Any: """Serialize to 'expr[index]' string.""" with context.modify(inside_dollar_string=True): - expr = self.children[0].serialize(options) - index = self.children[1].serialize(options) + expr = self.children[0].serialize(options, context) + index = self.children[1].serialize(options, context) result = f"{expr}{index}" if not context.inside_dollar_string: result = to_dollar_string(result) diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index c56e6e79..0303adfb 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -46,7 +46,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to ${expression} string.""" - return to_dollar_string(self.expression.serialize(options)) + return to_dollar_string(self.expression.serialize(options, context)) class StringPartRule(LarkRule): @@ -92,7 +92,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to a quoted string.""" - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return ( + '"' + + "".join(part.serialize(options, context) for part in self.string_parts) + + '"' + ) class HeredocTemplateRule(LarkRule): diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 743cf9ac..d05e4a4f 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -43,7 +43,7 @@ block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { - aws_account_ids = [ + aws_account_ids = [ for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] ] diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 0231987d..526b0216 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -184,7 +184,9 @@ def test_serialize_string(self): class TestObjectElemKeyExpressionRule(TestCase): def test_lark_name(self): - self.assertEqual(ObjectElemKeyExpressionRule.lark_name(), "object_elem_key") + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expr" + ) def test_expression_property(self): expr = StubExpression("1 + 1") diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 8ae7db67..54ecfa1a 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -371,14 +371,14 @@ def test_expression_key_object_element(self): result = d._deserialize_object_elem("${(var.key)}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + self.assertIsInstance(key_rule, ObjectElemKeyExpressionRule) def test_bare_expression_key_object_element(self): d = _deser() result = d._deserialize_object_elem("${1 + 1}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + self.assertIsInstance(key_rule, ObjectElemKeyExpressionRule) def test_object_elem_value_is_expr_term(self): d = _deser() diff --git a/test/unit/test_formatter.py b/test/unit/test_formatter.py index 0de4eadd..eceb1f65 100644 --- a/test/unit/test_formatter.py +++ b/test/unit/test_formatter.py @@ -790,3 +790,40 @@ def test_for_cond_expression_formatting(self): nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) self.assertGreater(nlc_count, 0) + + +# --- alignment idempotency --- + + +class TestAlignmentIdempotency(TestCase): + """Alignment must not double-pad when applied multiple times (#7).""" + + def test_attribute_alignment_does_not_double_pad(self): + """Running _vertically_align_attributes_in_body twice produces same padding.""" + f = _fmt() + attr_short = _make_attribute("a", "x") + attr_long = _make_attribute("long_name", "y") + body = BodyRule([attr_short, attr_long]) + + f._vertically_align_attributes_in_body(body) + eq_val_first = attr_short.children[1].value + + f._vertically_align_attributes_in_body(body) + eq_val_second = attr_short.children[1].value + + self.assertEqual(eq_val_first, eq_val_second) + + def test_object_elem_alignment_does_not_double_pad(self): + """Running _vertically_align_object_elems twice produces same padding.""" + f = _fmt() + elem_short = _make_object_elem("a", "x") + elem_long = _make_object_elem("long_key", "y") + obj = _make_object([elem_short, elem_long], trailing_commas=False) + + f._vertically_align_object_elems(obj) + sep_val_first = elem_short.children[1].value + + f._vertically_align_object_elems(obj) + sep_val_second = elem_short.children[1].value + + self.assertEqual(sep_val_first, sep_val_second) From e893e7d65b79bae79e0ecd22b524ec9068c7761b Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:55:17 +0100 Subject: [PATCH 42/45] fix another bunch of issues --- cli/hcl_to_json.py | 1 - hcl2/reconstructor.py | 28 +++++++++++++++--- hcl2/rules/base.py | 2 +- hcl2/rules/for_expressions.py | 3 +- hcl2/rules/indexing.py | 3 +- hcl2/rules/whitespace.py | 7 ++--- test/integration/hcl2_reconstructed/smoke.tf | 4 +-- test/unit/cli/test_hcl_to_json.py | 30 ++++++++++++++++++++ test/unit/rules/test_base.py | 7 +++++ test/unit/rules/test_for_expressions.py | 24 ++++++++++++++++ test/unit/rules/test_whitespace.py | 12 ++++++++ 11 files changed, 107 insertions(+), 14 deletions(-) diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index d4acf0e5..108c7b34 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -111,7 +111,6 @@ def main(): def convert(in_file, out_file): _hcl_to_json(in_file, out_file, options, json_indent=json_indent) - out_file.write("\n") if args.PATH == "-": _convert_stdin(convert) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index fd437347..4760665a 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -4,7 +4,7 @@ from lark import Tree, Token from hcl2.rules import tokens from hcl2.rules.base import BlockRule -from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule from hcl2.rules.expressions import ( @@ -71,9 +71,8 @@ def _should_add_space_before( return True # Space around Conditional Expression operators - if ( - parent_rule_name == ConditionalRule.lark_name() - and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + if parent_rule_name == ConditionalRule.lark_name() and ( + token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] or self._last_token_name in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] ): @@ -151,6 +150,27 @@ def _should_add_space_before( ]: return True + # Space after QMARK/COLON in conditional expressions + if ( + parent_rule_name == ConditionalRule.lark_name() + and self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after colon in for expressions (before value expression, + # but not before newline/comment which provides its own whitespace) + if ( + self._last_token_name == tokens.COLON.lark_name() + and parent_rule_name + in [ + ForTupleExprRule.lark_name(), + ForObjectExprRule.lark_name(), + ] + and rule_name != "new_line_or_comment" + ): + return True + return False def _reconstruct_tree( diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index cf129ce1..edbca7a7 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -82,7 +82,7 @@ def serialize( result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attribute_names.add(child) + attribute_names.add(child.identifier.serialize(options)) result.update(child.serialize(options)) if options.with_comments: # collect in-line comments from attribute assignments, expressions etc diff --git a/hcl2/rules/for_expressions.py b/hcl2/rules/for_expressions.py index a062e66a..eb018343 100644 --- a/hcl2/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -1,5 +1,6 @@ """Rule classes for HCL2 for-tuple and for-object expressions.""" +from dataclasses import replace from typing import Any, Tuple, Optional, List from lark.tree import Meta @@ -306,7 +307,7 @@ def serialize( result += f"{self.key_expr.serialize(options, context)} => " result += self.value_expr.serialize( - SerializationOptions(wrap_objects=True), context + replace(options, wrap_objects=True), context ) if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index 455ae6ef..4cc292c0 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -13,6 +13,7 @@ LSQB, RSQB, ATTR_SPLAT, + FULL_SPLAT, ) from hcl2.rules.whitespace import ( InlineCommentMixIn, @@ -233,7 +234,7 @@ class FullSplatRule(LarkRule): """Rule for full splat expressions (e.g. [*].attr).""" _children_layout: Tuple[ - ATTR_SPLAT, + FULL_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 8591fd20..6b28837e 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -43,12 +43,11 @@ def to_list( comment = comment.strip() for delimiter in ("//", "/*", "#"): - if comment.startswith(delimiter): comment = comment[len(delimiter) :] - - if comment.endswith("*/"): - comment = comment[:-2] + if delimiter == "/*" and comment.endswith("*/"): + comment = comment[:-2] + break if comment != "": result.append(comment.strip()) diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index d05e4a4f..c8529e70 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -41,10 +41,10 @@ block label1 label2 { block { route53_forwarding_rule_shares = { - for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] ] } ... diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 1615487f..5c7e6f3a 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -49,6 +49,23 @@ def test_single_file_to_output(self): result = json.loads(_read_file(out_path)) self.assertEqual(result["x"], 1) + def test_single_file_to_stdout_single_trailing_newline(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, SIMPLE_HCL) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertTrue(output.endswith("\n"), "output should end with newline") + self.assertFalse( + output.endswith("\n\n"), + "output should not have double trailing newline", + ) + def test_stdin(self): stdout = StringIO() stdin = StringIO(SIMPLE_HCL) @@ -59,6 +76,19 @@ def test_stdin(self): result = json.loads(stdout.getvalue()) self.assertEqual(result["x"], 1) + def test_stdin_single_trailing_newline(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_HCL) + with patch("sys.argv", ["hcl2tojson", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertTrue(output.endswith("\n"), "output should end with newline") + self.assertFalse( + output.endswith("\n\n"), "output should not have double trailing newline" + ) + def test_directory_mode(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py index bcf240a8..4dc51f92 100644 --- a/test/unit/rules/test_base.py +++ b/test/unit/rules/test_base.py @@ -168,6 +168,13 @@ def test_serialize_bare_newlines_not_collected_as_comments(self): result = body.serialize(options=SerializationOptions(with_comments=True)) self.assertNotIn("__comments__", result) + def test_serialize_raises_when_block_name_collides_with_attribute(self): + attr = _make_attribute("resource", "value") + block = _make_block([_make_identifier("resource")]) + body = BodyRule([attr, block]) + with self.assertRaises(RuntimeError): + body.serialize() + def test_serialize_skips_newline_children(self): nlc = _make_nlc("\n") attr = _make_attribute("x", 1) diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py index 0691d81c..38cb90ea 100644 --- a/test/unit/rules/test_for_expressions.py +++ b/test/unit/rules/test_for_expressions.py @@ -34,9 +34,11 @@ class StubExpression(ExpressionRule): def __init__(self, value): self._stub_value = value + self._last_options = None super().__init__([], None) def serialize(self, options=SerializationOptions(), context=SerializationContext()): + self._last_options = options return self._stub_value @@ -390,3 +392,25 @@ def test_serialize_with_condition(self): result = rule.serialize() self.assertIn("if cond", result) self.assertEqual(result, "${{for k, v in items : key => value if cond}}") + + def test_serialize_preserves_caller_options(self): + value_expr = StubExpression("value") + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ] + ) + caller_options = SerializationOptions( + with_comments=True, preserve_heredocs=False + ) + rule.serialize(options=caller_options) + # value_expr should receive options with wrap_objects=True but + # all other caller settings preserved + self.assertTrue(value_expr._last_options.wrap_objects) + self.assertTrue(value_expr._last_options.with_comments) + self.assertFalse(value_expr._last_options.preserve_heredocs) diff --git a/test/unit/rules/test_whitespace.py b/test/unit/rules/test_whitespace.py index 351ca82c..49fde824 100644 --- a/test/unit/rules/test_whitespace.py +++ b/test/unit/rules/test_whitespace.py @@ -61,6 +61,18 @@ def test_to_list_block_comment(self): result = rule.to_list() self.assertEqual(result, ["block comment"]) + def test_to_list_line_comment_ending_in_block_close(self): + """A // comment ending in */ should preserve the */ suffix.""" + rule = _make_nlc("// comment ending in */\n") + result = rule.to_list() + self.assertEqual(result, ["comment ending in */"]) + + def test_to_list_hash_comment_ending_in_block_close(self): + """A # comment ending in */ should preserve the */ suffix.""" + rule = _make_nlc("# comment ending in */\n") + result = rule.to_list() + self.assertEqual(result, ["comment ending in */"]) + def test_to_list_multiple_comments(self): rule = _make_nlc("// first\n// second\n") result = rule.to_list() From 0c8a1c327d590c61b55380dcc260800d03fadf6b Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 8 Mar 2026 00:22:54 +0100 Subject: [PATCH 43/45] fix another bunch of issues --- cli/hcl_to_json.py | 2 +- cli/helpers.py | 4 ++++ cli/json_to_hcl.py | 2 +- hcl2/const.py | 1 + hcl2/deserializer.py | 4 ++-- hcl2/rules/base.py | 4 ++-- hcl2/rules/functions.py | 20 ----------------- hcl2/rules/strings.py | 2 +- test/unit/cli/test_hcl_to_json.py | 6 ++--- test/unit/rules/test_functions.py | 37 ------------------------------- test/unit/rules/test_strings.py | 2 +- test/unit/test_deserializer.py | 6 ++++- 12 files changed, 20 insertions(+), 70 deletions(-) diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index 108c7b34..7e9f7275 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -129,4 +129,4 @@ def convert(in_file, out_file): out_extension=".json", ) else: - raise RuntimeError("Invalid Path", args.PATH) + raise RuntimeError(f"Invalid Path: {args.PATH}") diff --git a/cli/helpers.py b/cli/helpers.py index 6d463d45..b7d48376 100644 --- a/cli/helpers.py +++ b/cli/helpers.py @@ -26,6 +26,8 @@ def _convert_single_file( convert_fn(in_file, out_file) except skippable: if skip: + if os.path.exists(out_path): + os.remove(out_path) return raise else: @@ -83,6 +85,8 @@ def _convert_directory( convert_fn(in_file, out_file) except skippable: if skip: + if os.path.exists(out_file_path): + os.remove(out_file_path) continue raise diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py index 65caeb09..826b7796 100644 --- a/cli/json_to_hcl.py +++ b/cli/json_to_hcl.py @@ -133,4 +133,4 @@ def convert(in_file, out_file): out_extension=".tf", ) else: - raise RuntimeError("Invalid Path", args.PATH) + raise RuntimeError(f"Invalid Path: {args.PATH}") diff --git a/hcl2/const.py b/hcl2/const.py index c36a5321..555c56aa 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ IS_BLOCK = "__is_block__" COMMENTS_KEY = "__comments__" +INLINE_COMMENTS_KEY = "__inline_comments__" diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 212dcc6f..5043985a 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -9,7 +9,7 @@ from regex import regex from hcl2.parser import parser as _get_parser -from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.const import IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( BlockRule, @@ -336,7 +336,7 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: def _is_reserved_key(self, key: str) -> bool: """Check if a key is a reserved metadata key that should be skipped during deserialization.""" - return key in (IS_BLOCK, COMMENTS_KEY) + return key in (IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY) def _is_expression(self, value: Any) -> bool: return isinstance(value, str) and value.startswith("${") and value.endswith("}") diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index edbca7a7..540d6284 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -5,7 +5,7 @@ from lark.tree import Meta -from hcl2.const import IS_BLOCK +from hcl2.const import IS_BLOCK, INLINE_COMMENTS_KEY from hcl2.rules.abstract import LarkRule, LarkToken from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule @@ -97,7 +97,7 @@ def serialize( if comments: result["__comments__"] = comments if inline_comments: - result["__inline_comments__"] = inline_comments + result[INLINE_COMMENTS_KEY] = inline_comments return result diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index e9722965..bd574ebe 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -111,23 +111,3 @@ def serialize( result = to_dollar_string(result) return result - - -class ProviderFunctionCallRule(FunctionCallRule): - """Rule for provider-namespaced function calls.""" - - _children_layout: Tuple[ - IdentifierRule, - IdentifierRule, - IdentifierRule, - LPAR, - Optional[NewLineOrCommentRule], - Optional[ArgumentsRule], - Optional[NewLineOrCommentRule], - RPAR, - ] - - @staticmethod - def lark_name() -> str: - """Return the grammar rule name.""" - return "provider_function_call" diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index 0303adfb..2a19a0f9 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -143,7 +143,7 @@ class HeredocTrimTemplateRule(HeredocTemplateRule): @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "heredoc_trim_template" + return "heredoc_template_trim" def serialize( self, options=SerializationOptions(), context=SerializationContext() diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 5c7e6f3a..4954d09c 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -215,10 +215,8 @@ def test_skip_error_with_output_file(self): with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): main() - # The output file is created (opened for writing) before - # conversion; on a skipped error it will be empty. - self.assertTrue(os.path.exists(out_path)) - self.assertEqual(_read_file(out_path), "") + # The partial output file is cleaned up on skipped errors. + self.assertFalse(os.path.exists(out_path)) def test_raise_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 18a763bd..6d3146c0 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -5,7 +5,6 @@ from hcl2.rules.functions import ( ArgumentsRule, FunctionCallRule, - ProviderFunctionCallRule, ) from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR, StringToken @@ -145,39 +144,3 @@ def test_arguments_with_colons_tokens(self): rule = FunctionCallRule(children) self.assertIsNotNone(rule.arguments) self.assertEqual(rule.serialize(), "${provider::func::aa(5)}") - - -# --- ProviderFunctionCallRule tests --- - - -class TestProviderFunctionCallRule(TestCase): - def test_lark_name(self): - self.assertEqual(ProviderFunctionCallRule.lark_name(), "provider_function_call") - - def test_inherits_function_call_rule(self): - self.assertTrue(issubclass(ProviderFunctionCallRule, FunctionCallRule)) - - def test_serialize_provider_function(self): - children = [ - _make_identifier("ns"), - _make_identifier("mod"), - _make_identifier("func"), - LPAR(), - _make_arguments(["a"]), - RPAR(), - ] - rule = ProviderFunctionCallRule(children) - self.assertEqual(rule.serialize(), "${ns::mod::func(a)}") - - def test_serialize_inside_dollar_string(self): - children = [ - _make_identifier("ns"), - _make_identifier("mod"), - _make_identifier("func"), - LPAR(), - _make_arguments(["a"]), - RPAR(), - ] - rule = ProviderFunctionCallRule(children) - ctx = SerializationContext(inside_dollar_string=True) - self.assertEqual(rule.serialize(context=ctx), "ns::mod::func(a)") diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index 9abf7eb6..b037d997 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -243,7 +243,7 @@ def test_serialize_no_preserve_invalid_raises(self): class TestHeredocTrimTemplateRule(TestCase): def test_lark_name(self): - self.assertEqual(HeredocTrimTemplateRule.lark_name(), "heredoc_trim_template") + self.assertEqual(HeredocTrimTemplateRule.lark_name(), "heredoc_template_trim") def test_serialize_preserve_heredocs_trims_indent(self): token = HEREDOC_TRIM_TEMPLATE("<<-EOF\n line1\n line2\nEOF") diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 54ecfa1a..5ec25fe9 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -1,7 +1,7 @@ # pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase -from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.const import IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY from hcl2.deserializer import BaseDeserializer, DeserializerOptions from hcl2.rules.base import StartRule, BodyRule, BlockRule, AttributeRule from hcl2.rules.containers import ( @@ -467,6 +467,10 @@ def test_is_reserved_key_comments(self): d = _deser() self.assertTrue(d._is_reserved_key(COMMENTS_KEY)) + def test_is_reserved_key_inline_comments(self): + d = _deser() + self.assertTrue(d._is_reserved_key(INLINE_COMMENTS_KEY)) + def test_is_reserved_key_normal_key(self): d = _deser() self.assertFalse(d._is_reserved_key("name")) From 5ce94f810d09d1ffb031d3b80216948a4c4b726d Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 8 Mar 2026 00:46:43 +0100 Subject: [PATCH 44/45] increase minimum test coverage --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 89ef860b..6e581776 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,6 +10,6 @@ omit = [report] show_missing = true -fail_under = 90 +fail_under = 95 exclude_lines = raise NotImplementedError From 4af1b0f36e92e1d82a8c508b4bed6995792c5db2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 9 Mar 2026 12:22:02 +0100 Subject: [PATCH 45/45] update CLAUDE.md --- CLAUDE.md | 277 +++++++++++++++++------------------------------------- 1 file changed, 86 insertions(+), 191 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 8d5260ab..e09f58c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,226 +1,121 @@ -# HCL2 Parser Development Guidelines +# HCL2 Parser — CLAUDE.md -When working with this HCL2 parser codebase, follow these architectural principles and patterns. - -## Core Architecture Rules - -**ALWAYS** understand the bidirectional pipeline: +## Pipeline ``` Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict/JSON Reverse: Python Dict/JSON → LarkElement Tree → Lark Tree → HCL2 Text ``` -**NEVER** bypass the LarkElement intermediate representation. It provides type safety and enables bidirectional transformations. - -**REMEMBER** that separation of concerns is key: - -- Grammar definition (`hcl2.lark`) — syntax rules -- Transformer (`transformer.py`) — Lark parse tree → LarkElement tree -- Serialization (`rules/*.serialize()`) — LarkElement tree → Python dict -- Deserializer (`deserializer.py`) — Python dict → LarkElement tree -- Formatter (`formatter.py`) — whitespace alignment and spacing on LarkElement trees -- Reconstructor (`reconstructor.py`) — LarkElement tree → HCL2 text via Lark - -### Public API Design - -**FOLLOW** the `json` module convention in `api.py`: +## Module Map + +| Module | Role | +|---|---| +| `hcl2/hcl2.lark` | Lark grammar definition | +| `hcl2/api.py` | Public API (`load/loads/dump/dumps` + intermediate stages) | +| `hcl2/parser.py` | Lark parser factory with caching | +| `hcl2/transformer.py` | Lark parse tree → LarkElement tree | +| `hcl2/deserializer.py` | Python dict → LarkElement tree | +| `hcl2/formatter.py` | Whitespace alignment and spacing on LarkElement trees | +| `hcl2/reconstructor.py` | LarkElement tree → HCL2 text via Lark | +| `hcl2/builder.py` | Programmatic HCL document construction | +| `hcl2/utils.py` | `SerializationOptions`, `SerializationContext`, string helpers | +| `hcl2/const.py` | Constants: `IS_BLOCK`, `COMMENTS_KEY`, `INLINE_COMMENTS_KEY` | +| `cli/helpers.py` | File/directory/stdin conversion helpers | +| `cli/hcl_to_json.py` | `hcl2tojson` entry point | +| `cli/json_to_hcl.py` | `jsontohcl2` entry point | + +`hcl2/__main__.py` is a thin wrapper that imports `cli.hcl_to_json:main`. + +### Rules (one class per grammar rule) + +| File | Domain | +|---|---| +| `rules/abstract.py` | `LarkElement`, `LarkRule`, `LarkToken` base classes | +| `rules/tokens.py` | `StringToken` (cached factory), `StaticStringToken`, punctuation constants | +| `rules/base.py` | `StartRule`, `BodyRule`, `BlockRule`, `AttributeRule` | +| `rules/containers.py` | `TupleRule`, `ObjectRule`, `ObjectElemRule`, `ObjectElemKeyRule` | +| `rules/expressions.py` | `ExprTermRule`, `BinaryOpRule`, `UnaryOpRule`, `ConditionalRule` | +| `rules/literal_rules.py` | `IntLitRule`, `FloatLitRule`, `IdentifierRule`, `KeywordRule` | +| `rules/strings.py` | `StringRule`, `InterpolationRule`, `HeredocTemplateRule` | +| `rules/functions.py` | `FunctionCallRule`, `ArgumentsRule` | +| `rules/indexing.py` | `GetAttrRule`, `SqbIndexRule`, splat rules | +| `rules/for_expressions.py` | `ForTupleExprRule`, `ForObjectExprRule`, `ForIntroRule`, `ForCondRule` | +| `rules/whitespace.py` | `NewLineOrCommentRule`, `InlineCommentMixIn` | + +## Public API (`api.py`) + +Follows the `json` module convention. All option parameters are keyword-only. - `load/loads` — HCL2 text → Python dict - `dump/dumps` — Python dict → HCL2 text -- Intermediate stages for advanced usage: `parse/parses`, `parse_to_tree/parses_to_tree`, `transform`, `serialize`, `from_dict`, `from_json`, `reconstruct` -- All option parameters are keyword-only - -## Design Pattern Guidelines - -### Rule-Based Transformation Pattern - -**FOLLOW** the one-to-one mapping: each Lark grammar rule corresponds to exactly one `LarkRule` class. - -**ENSURE** every rule class: - -- Mirrors lark grammar definition -- Inherits from appropriate base class (`LarkRule` or `LarkToken`) -- Implements `lark_name()` returning the grammar rule name -- Provides typed property accessors for child elements -- Handles its own serialization logic via `serialize()` -- Defines `_children` static field with appropriate type hinting - -**LOCATE** transformation logic in `hcl2/transformer.py` - -### Type Safety Requirements - -**USE** abstract base classes from `hcl2/rules/abstract.py` to define contracts. - -**PROVIDE** comprehensive type hints for all rule children structures. - -**LEVERAGE** the generic token system in `hcl2/rules/tokens.py` for dynamic token creation with caching. - -### Modular Organization Rules +- Intermediate stages: `parse/parses`, `parse_to_tree/parses_to_tree`, `transform`, `serialize`, `from_dict`, `from_json`, `reconstruct` -**ORGANIZE** rules by domain responsibility: +### Option Dataclasses -- **Structural rules** → `rules/base.py` -- **Container rules** → `rules/containers.py` -- **Expression rules** → `rules/expressions.py` -- **Literal rules** → `rules/literal_rules.py` -- **String rules** → `rules/strings.py` -- **Function rules** → `rules/functions.py` -- **Indexing rules** → `rules/indexing.py` -- **For-expression rules** → `rules/for_expressions.py` -- **Metadata rules** → `rules/whitespace.py` +**`SerializationOptions`** (LarkElement → dict): +`with_comments`, `with_meta`, `wrap_objects`, `wrap_tuples`, `explicit_blocks`, `preserve_heredocs`, `force_operation_parentheses`, `preserve_scientific_notation` -**NEVER** mix concerns across these domains. +**`DeserializerOptions`** (dict → LarkElement): +`heredocs_to_strings`, `strings_to_heredocs`, `object_elements_colon`, `object_elements_trailing_comma` -### Serialization Strategy Guidelines +**`FormatterOptions`** (whitespace/alignment): +`indent_length`, `open_empty_blocks`, `open_empty_objects`, `open_empty_tuples`, `vertically_align_attributes`, `vertically_align_object_elements` -**IMPLEMENT** context-aware serialization using: +## CLI -- `SerializationOptions` for configuration -- `SerializationContext` for state tracking -- Context managers for temporary state changes +Console scripts defined in `pyproject.toml`. Each uses argparse flags that map directly to the option dataclass fields above. -**REFERENCE** implementation patterns in `hcl2/utils.py` - -**ENSURE** each rule type follows its serialization strategy: - -- Structural rules create nested dictionaries -- Container rules handle collections with optional wrapping -- Expression rules generate `${...}` interpolation when needed -- Literal rules convert to appropriate Python types - -## Critical Implementation Rules - -### Block vs Object Distinction - -**ALWAYS** preserve the semantic difference between HCL2 blocks and data objects. - -**USE** `__is_block__` markers to maintain semantic intent during round-trips. - -**IMPLEMENT** block recognition logic in deserializer that can distinguish blocks from regular objects. - -**HANDLE** multi-label blocks by implementing recursive label extraction algorithms. - -### Bidirectional Requirements - -**ENSURE** every serialization operation has a corresponding deserialization counterpart. - -**TEST** round-trip integrity: Parse → Serialize → Deserialize → Serialize should produce identical results. - -**REFERENCE** deserialization patterns in `hcl2/deserializer.py` - -### String Interpolation Handling - -**SUPPORT** nested expression evaluation within `${expression}` syntax. +``` +hcl2tojson --json-indent 2 --with-meta file.tf +jsontohcl2 --indent 4 --no-align file.json +``` -**HANDLE** escape sequences and literal text segments properly. +Add new options as `parser.add_argument()` calls in the relevant entry point module. -**MAINTAIN** context awareness when generating interpolation strings. +## Hard Rules -## Extension Guidelines +These are project-specific constraints that must not be violated: -### Adding New Language Constructs +1. **Always use the LarkElement IR.** Never transform directly from Lark parse tree to Python dict or vice versa. +1. **Block vs object distinction.** Use `__is_block__` markers (`const.IS_BLOCK`) to preserve semantic intent during round-trips. The deserializer must distinguish blocks from regular objects. +1. **Bidirectional completeness.** Every serialization path must have a corresponding deserialization path. Test round-trip integrity: Parse → Serialize → Deserialize → Serialize produces identical results. +1. **One grammar rule = one `LarkRule` class.** Each class implements `lark_name()`, typed property accessors, `serialize()`, and declares `_children_layout: Tuple[...]` (annotation only, no assignment) to document child structure. +1. **Token caching.** Use the `StringToken` factory in `rules/tokens.py` — never create token instances directly. +1. **Interpolation context.** `${...}` generation depends on nesting depth — always pass and respect `SerializationContext`. +1. **Update both directions.** When adding language features, update transformer.py, deserializer.py, formatter.py and reconstructor.py. -**FOLLOW** this exact sequence: +## Adding a New Language Construct 1. Add grammar rules to `hcl2.lark` -1. Create rule classes following existing patterns -1. Add transformer methods to map grammar to rules -1. Implement serialization logic in rule classes -1. Update deserializer for round-trip support - -### Rule Implementation Conventions +1. Create rule class(es) in the appropriate `rules/` file +1. Add transformer method(s) in `transformer.py` +1. Implement `serialize()` in the rule class +1. Update `deserializer.py`, `formatter.py` and `reconstructor.py` for round-trip support -**ALWAYS** implement these methods/properties: +## Testing -- `lark_name()` static method -- Property accessors for child elements -- `serialize()` method with context support -- Type hints for `_children` structure +Framework: `unittest.TestCase` (not pytest). -**FOLLOW** naming conventions consistent with existing rules. - -### Testing Requirements - -**USE** `unittest.TestCase` as the test framework (not pytest). - -**ORGANIZE** tests into two directories: - -- `test/unit/` — granular tests that instantiate rule objects directly (no parsing) - - `test/unit/rules/` — one file per rules module (e.g., `test_expressions.py` covers `hcl2/rules/expressions.py`) - - `test/unit/test_api.py`, `test/unit/test_builder.py`, etc. — other module tests -- `test/integration/` — full-pipeline tests using golden files - - `test_round_trip.py` — suite-based step tests (HCL→JSON, JSON→JSON, JSON→HCL, full round-trip) that iterate over all suites in `hcl2_original/` - - `test_specialized.py` — feature-specific integration tests (operator precedence, Builder round-trip) with golden files in `specialized/` - -**USE** concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)` for testing `_wrap_into_parentheses` logic without the parser). - -**RUN** tests with: `python -m unittest discover -s test -p "test_*.py" -v` - -## Code Quality Rules - -### Type Safety Requirements - -**PROVIDE** full type hints to enable static analysis. - -**USE** proper inheritance hierarchies to catch errors at runtime. - -**IMPLEMENT** property-based access to prevent structural errors. - -### Performance Considerations - -**LEVERAGE** cached token creation to prevent duplicate instantiation. - -**IMPLEMENT** lazy evaluation for context-sensitive processing. - -**OPTIMIZE** tree traversal using parent-child references. - -### Maintainability Standards - -**ENSURE** each rule has single responsibility for one grammar construct. - -**FOLLOW** open/closed principle: extend via new rules, don't modify existing ones. - -**MAINTAIN** clear import dependencies and type relationships. - -## File Organization Standards - -**KEEP** core abstractions in `rules/abstract.py` - -**GROUP** domain-specific rules by functionality in separate files - -**SEPARATE** utility functions into dedicated modules - -**MAINTAIN** grammar definition independence from implementation - -**STRUCTURE** test infrastructure to support incremental validation - -## Common Pitfalls to Avoid - -**DO NOT** create direct transformations from parse tree to Python dict - always use LarkElement intermediate representation. - -**DO NOT** mix serialization concerns across rule types - each rule handles its own format. - -**DO NOT** ignore context when generating expressions - interpolation behavior depends on nesting. - -**DO NOT** forget to update both serialization and deserialization when adding new constructs. - -**DO NOT** bypass the factory pattern for token creation - use the cached `StringToken` system. - -## When Making Changes +``` +python -m unittest discover -s test -p "test_*.py" -v +``` -**ALWAYS** run round-trip tests after any modifications. +**Unit tests** (`test/unit/`): instantiate rule objects directly (no parsing). -**VERIFY** that new rules follow existing patterns and conventions. +- `test/unit/rules/` — one file per rules module +- `test/unit/cli/` — one file per CLI module +- `test/unit/test_api.py`, `test_builder.py`, `test_deserializer.py`, `test_formatter.py`, `test_reconstructor.py`, `test_utils.py` -**UPDATE** both transformer and deserializer when adding language features. +Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`). -**MAINTAIN** type safety and proper inheritance relationships. +**Integration tests** (`test/integration/`): full-pipeline tests with golden files. -**DOCUMENT** any new patterns or conventions introduced. +- `test_round_trip.py` — iterates over all suites in `hcl2_original/`, tests HCL→JSON, JSON→JSON, JSON→HCL, and full round-trip +- `test_specialized.py` — feature-specific tests with golden files in `specialized/` -This architecture enables robust HCL2 parsing with full round-trip fidelity while maintaining code quality and extensibility. +Always run round-trip full test suite after any modification. -## Keeping This File Current +## Keeping Docs Current -**PROACTIVELY** update this file when your work changes the architecture, file organization, module responsibilities, public API surface, or testing conventions described above. If you add, rename, move, or delete modules, rules files, test directories, or pipeline stages — reflect those changes here before finishing the task. Stale documentation is worse than no documentation. +Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and `docs/usage.md` when changes affect the public API, CLI flags, or option fields.