From b8be80cb8b275ce440cfc960dfa0fb13e3192f90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 22 Mar 2025 00:48:41 +0100 Subject: [PATCH 01/42] transformer overhaul --- hcl2/api.py | 2 +- hcl2/{transformer.py => dict_transformer.py} | 4 + hcl2/rule_transformer.py | 101 ++++ hcl2/serialization.py | 496 +++++++++++++++++++ test/helpers/hcl2_helper.py | 2 +- test/unit/test_dict_transformer.py | 2 +- 6 files changed, 604 insertions(+), 3 deletions(-) rename hcl2/{transformer.py => dict_transformer.py} (99%) create mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/serialization.py diff --git a/hcl2/api.py b/hcl2/api.py index 399ba929..1cec02a2 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -3,7 +3,7 @@ from lark.tree import Tree from hcl2.parser import parser, reconstruction_parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer diff --git a/hcl2/transformer.py b/hcl2/dict_transformer.py similarity index 99% rename from hcl2/transformer.py rename to hcl2/dict_transformer.py index 382092d6..64c58bcb 100644 --- a/hcl2/transformer.py +++ b/hcl2/dict_transformer.py @@ -277,6 +277,10 @@ def heredoc_template_trim(self, args: List) -> str: def new_line_or_comment(self, args: List) -> _DiscardType: return Discard + # def EQ(self, args: List): + # print("EQ", args) + # return args + def for_tuple_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py new file mode 100644 index 00000000..8f0b922a --- /dev/null +++ b/hcl2/rule_transformer.py @@ -0,0 +1,101 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.visitors import _Leaf_T, _Return_T, Discard + +from hcl2.serialization import ( + LarkRule, + LarkToken, + StartRule, + BodyRule, + BlockRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + ExprTermRule, + ConditionalRule, + BinaryOpRule, + BinaryOperatorRule, + BinaryTermRule, + UnaryOpRule, + AttributeRule, + NewLineOrCommentRule, +) + +ArgsType = List[Union[Token, Tree]] + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def start(self, args: ArgsType) -> StartRule: + return StartRule(args) + + def body(self, args: ArgsType) -> BodyRule: + return BodyRule(args) + + def block(self, args: ArgsType) -> BlockRule: + return BlockRule(args) + + def identifier(self, args: ArgsType) -> IdentifierRule: + return IdentifierRule(args) + + def int_lit(self, args: ArgsType) -> IntLitRule: + return IntLitRule(args) + + def float_lit(self, args: ArgsType) -> FloatLitRule: + return FloatLitRule(args) + + def string_lit(self, args: ArgsType) -> StringLitRule: + return StringLitRule(args) + + def expr_term(self, args: ArgsType) -> ExprTermRule: + return ExprTermRule(args) + + def conditional(self, args: ArgsType) -> ConditionalRule: + return ConditionalRule(args) + + def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: + return BinaryOperatorRule(args) + + def binary_term(self, args: ArgsType) -> BinaryTermRule: + return BinaryTermRule(args) + + def unary_op(self, args: ArgsType) -> UnaryOpRule: + return UnaryOpRule(args) + + def binary_op(self, args: ArgsType) -> BinaryOpRule: + return BinaryOpRule(args) + + def attribute(self, args: ArgsType) -> AttributeRule: + return AttributeRule(args) + + def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args) + return Discard + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) diff --git a/hcl2/serialization.py b/hcl2/serialization.py new file mode 100644 index 00000000..15d75caa --- /dev/null +++ b/hcl2/serialization.py @@ -0,0 +1,496 @@ +from abc import ABC, abstractmethod +from json import JSONEncoder +from typing import List, Any, Union, Tuple, Optional + +from lark import Tree, Token + +ArgsType = List["LarkElement"] + + +def is_dollar_string(value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + + return f"({value})" + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + _classes = [] + + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children): + self._children: List[LarkElement] = children + + def __init_subclass__(cls, **kwargs): + cls._classes.append(cls) + + def __repr__(self): + return f"" + + +class StartRule(LarkRule): + + _children: Tuple["BodyRule"] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> "BodyRule": + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + "NewLineOrCommentRule", + "AttributeRule", + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class BlockRule(LarkRule): + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List["IdentifierRule"]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result + + +class IdentifierRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "identifier" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IntLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "int_lit" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class FloatLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "float_lit" + + def __init__(self, children): + print("float_lit", children) + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class StringLitRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined()[1:-1] + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self._parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(ExprTermRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children): + super().__init__(children) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryOperatorRule(LarkRule): + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + +class BinaryTermRule(LarkRule): + _children: Tuple[ + BinaryOperatorRule, + Optional["NewLineOrCommentRule"], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children): + if len(children) == 2: + children.insert(1, None) + super().__init__(children) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional["NewLineOrCommentRule"]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class UnaryOpRule(Expression): + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def unary_operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + "NewLineOrCommentRule", + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py index 5acee1e7..c39ee7fb 100644 --- a/test/helpers/hcl2_helper.py +++ b/test/helpers/hcl2_helper.py @@ -3,7 +3,7 @@ from lark import Tree from hcl2.parser import parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class Hcl2Helper: diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py index 122332eb..baad5ba9 100644 --- a/test/unit/test_dict_transformer.py +++ b/test/unit/test_dict_transformer.py @@ -2,7 +2,7 @@ from unittest import TestCase -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class TestDictTransformer(TestCase): From e39b42918b4f6dca5694bd836faa5ee649b8e560 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 26 Mar 2025 21:28:54 +0100 Subject: [PATCH 02/42] reorganize code --- hcl2/rule_transformer.py | 101 ---- hcl2/rule_transformer/__init__.py | 0 hcl2/rule_transformer/json.py | 12 + hcl2/rule_transformer/rules/__init__.py | 0 hcl2/rule_transformer/rules/abstract.py | 93 ++++ hcl2/rule_transformer/rules/base.py | 122 +++++ hcl2/rule_transformer/rules/expression.py | 187 +++++++ hcl2/rule_transformer/rules/token_sequence.py | 63 +++ hcl2/rule_transformer/rules/whitespace.py | 36 ++ hcl2/rule_transformer/transformer.py | 118 +++++ hcl2/rule_transformer/utils.py | 23 + hcl2/serialization.py | 496 ------------------ 12 files changed, 654 insertions(+), 597 deletions(-) delete mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/rule_transformer/__init__.py create mode 100644 hcl2/rule_transformer/json.py create mode 100644 hcl2/rule_transformer/rules/__init__.py create mode 100644 hcl2/rule_transformer/rules/abstract.py create mode 100644 hcl2/rule_transformer/rules/base.py create mode 100644 hcl2/rule_transformer/rules/expression.py create mode 100644 hcl2/rule_transformer/rules/token_sequence.py create mode 100644 hcl2/rule_transformer/rules/whitespace.py create mode 100644 hcl2/rule_transformer/transformer.py create mode 100644 hcl2/rule_transformer/utils.py delete mode 100644 hcl2/serialization.py diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py deleted file mode 100644 index 8f0b922a..00000000 --- a/hcl2/rule_transformer.py +++ /dev/null @@ -1,101 +0,0 @@ -# pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - -from lark import Transformer, Tree, Token -from lark.visitors import _Leaf_T, _Return_T, Discard - -from hcl2.serialization import ( - LarkRule, - LarkToken, - StartRule, - BodyRule, - BlockRule, - IdentifierRule, - IntLitRule, - FloatLitRule, - StringLitRule, - ExprTermRule, - ConditionalRule, - BinaryOpRule, - BinaryOperatorRule, - BinaryTermRule, - UnaryOpRule, - AttributeRule, - NewLineOrCommentRule, -) - -ArgsType = List[Union[Token, Tree]] - - -class RuleTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a tree of LarkRule instances - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments - super().__init__() - - def start(self, args: ArgsType) -> StartRule: - return StartRule(args) - - def body(self, args: ArgsType) -> BodyRule: - return BodyRule(args) - - def block(self, args: ArgsType) -> BlockRule: - return BlockRule(args) - - def identifier(self, args: ArgsType) -> IdentifierRule: - return IdentifierRule(args) - - def int_lit(self, args: ArgsType) -> IntLitRule: - return IntLitRule(args) - - def float_lit(self, args: ArgsType) -> FloatLitRule: - return FloatLitRule(args) - - def string_lit(self, args: ArgsType) -> StringLitRule: - return StringLitRule(args) - - def expr_term(self, args: ArgsType) -> ExprTermRule: - return ExprTermRule(args) - - def conditional(self, args: ArgsType) -> ConditionalRule: - return ConditionalRule(args) - - def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: - return BinaryOperatorRule(args) - - def binary_term(self, args: ArgsType) -> BinaryTermRule: - return BinaryTermRule(args) - - def unary_op(self, args: ArgsType) -> UnaryOpRule: - return UnaryOpRule(args) - - def binary_op(self, args: ArgsType) -> BinaryOpRule: - return BinaryOpRule(args) - - def attribute(self, args: ArgsType) -> AttributeRule: - return AttributeRule(args) - - def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args) - return Discard - - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) - - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rule_transformer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py new file mode 100644 index 00000000..647b6683 --- /dev/null +++ b/hcl2/rule_transformer/json.py @@ -0,0 +1,12 @@ +from json import JSONEncoder +from typing import Any + +from hcl2.rule_transformer.rules.abstract import LarkRule + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py new file mode 100644 index 00000000..37f63a03 --- /dev/null +++ b/hcl2/rule_transformer/rules/abstract.py @@ -0,0 +1,93 @@ +from abc import ABC, abstractmethod +from typing import Any, Union, List, Optional + +from lark import Token, Tree +from lark.tree import Meta + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken +COLON_TOKEN = LarkToken +LPAR_TOKEN = LarkToken # left parenthesis +RPAR_TOKEN = LarkToken # right parenthesis + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + @property + def children(self) -> List[LarkElement]: + return self._children + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children, meta: Optional[Meta] = None): + self._children = children + self._meta = meta + + def __repr__(self): + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py new file mode 100644 index 00000000..f46d8039 --- /dev/null +++ b/hcl2/rule_transformer/rules/base.py @@ -0,0 +1,122 @@ +from typing import Tuple, Any, List, Union, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.token_sequence import IdentifierRule + +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + NewLineOrCommentRule, + AttributeRule, + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class StartRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> BodyRule: + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BlockRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List[IdentifierRule]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py new file mode 100644 index 00000000..2a38912a --- /dev/null +++ b/hcl2/rule_transformer/rules/expression.py @@ -0,0 +1,187 @@ +from abc import ABC +from typing import Any, Tuple, Optional, List + +from lark import Tree, Token +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import ( + LarkRule, + LarkToken, + LPAR_TOKEN, + RPAR_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.utils import ( + wrap_into_parentheses, + to_dollar_string, + unwrap_dollar_string, +) + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + + _children: Tuple[ + Optional[LPAR_TOKEN], + Optional[NewLineOrCommentRule], + Expression, + Optional[NewLineOrCommentRule], + Optional[RPAR_TOKEN], + ] + + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children, meta: Optional[Meta] = None): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children, meta) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self.parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(LarkRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryTermRule(LarkRule): + + _children: Tuple[ + BinaryOperatorRule, + Optional[NewLineOrCommentRule], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children, meta: Optional[Meta] = None): + if len(children) == 2: + children.insert(1, None) + super().__init__(children, meta) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional[NewLineOrCommentRule]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + NewLineOrCommentRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class UnaryOpRule(Expression): + + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py new file mode 100644 index 00000000..66e22e2f --- /dev/null +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -0,0 +1,63 @@ +from abc import ABC +from typing import Tuple, Any, List, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken + + +class TokenSequenceRule(LarkRule, ABC): + + _children: Tuple[TokenSequence] + + def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IdentifierRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "identifier" + + def serialize(self) -> str: + return str(super().serialize()) + + +class IntLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "int_lit" + + def serialize(self) -> float: + return int(super().serialize()) + + +class FloatLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "float_lit" + + def serialize(self) -> float: + return float(super().serialize()) + + +class StringLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> str: + return str(super().serialize()) + + +class BinaryOperatorRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> str: + return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py new file mode 100644 index 00000000..f56a386e --- /dev/null +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -0,0 +1,36 @@ +from typing import Optional, List, Any + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py new file mode 100644 index 00000000..9e6af6ef --- /dev/null +++ b/hcl2/rule_transformer/transformer.py @@ -0,0 +1,118 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.tree import Meta +from lark.visitors import _Leaf_T, Discard, v_args + +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.base import ( + StartRule, + BodyRule, + BlockRule, + AttributeRule, +) +from hcl2.rule_transformer.rules.expression import ( + BinaryTermRule, + ConditionalRule, + ExprTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rule_transformer.rules.token_sequence import ( + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) + + @v_args(meta=True) + def start(self, meta: Meta, args) -> StartRule: + return StartRule(args, meta) + + @v_args(meta=True) + def body(self, meta: Meta, args) -> BodyRule: + return BodyRule(args, meta) + + @v_args(meta=True) + def block(self, meta: Meta, args) -> BlockRule: + return BlockRule(args, meta) + + @v_args(meta=True) + def identifier(self, meta: Meta, args) -> IdentifierRule: + return IdentifierRule(args, meta) + + @v_args(meta=True) + def int_lit(self, meta: Meta, args) -> IntLitRule: + return IntLitRule(args, meta) + + @v_args(meta=True) + def float_lit(self, meta: Meta, args) -> FloatLitRule: + return FloatLitRule(args, meta) + + @v_args(meta=True) + def string_lit(self, meta: Meta, args) -> StringLitRule: + return StringLitRule(args, meta) + + @v_args(meta=True) + def expr_term(self, meta: Meta, args) -> ExprTermRule: + return ExprTermRule(args, meta) + + @v_args(meta=True) + def conditional(self, meta: Meta, args) -> ConditionalRule: + return ConditionalRule(args, meta) + + @v_args(meta=True) + def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule: + return BinaryOperatorRule(args, meta) + + @v_args(meta=True) + def binary_term(self, meta: Meta, args) -> BinaryTermRule: + return BinaryTermRule(args, meta) + + @v_args(meta=True) + def unary_op(self, meta: Meta, args) -> UnaryOpRule: + return UnaryOpRule(args, meta) + + @v_args(meta=True) + def binary_op(self, meta: Meta, args) -> BinaryOpRule: + return BinaryOpRule(args, meta) + + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args, meta) + return Discard diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py new file mode 100644 index 00000000..060d3b53 --- /dev/null +++ b/hcl2/rule_transformer/utils.py @@ -0,0 +1,23 @@ +def is_dollar_string(value: str) -> bool: + if not isinstance(value, str): + return False + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + return f"({value})" diff --git a/hcl2/serialization.py b/hcl2/serialization.py deleted file mode 100644 index 15d75caa..00000000 --- a/hcl2/serialization.py +++ /dev/null @@ -1,496 +0,0 @@ -from abc import ABC, abstractmethod -from json import JSONEncoder -from typing import List, Any, Union, Tuple, Optional - -from lark import Tree, Token - -ArgsType = List["LarkElement"] - - -def is_dollar_string(value: str) -> bool: - return value.startswith("${") and value.endswith("}") - - -def to_dollar_string(value: str) -> str: - if not is_dollar_string(value): - return f"${{{value}}}" - return value - - -def unwrap_dollar_string(value: str) -> str: - if is_dollar_string(value): - return value[2:-1] - return value - - -def wrap_into_parentheses(value: str) -> str: - if is_dollar_string(value): - value = unwrap_dollar_string(value) - return to_dollar_string(f"({value})") - - return f"({value})" - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) - - -class LarkElement(ABC): - @abstractmethod - def tree(self) -> Token: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name - self._value = value - - @property - def name(self) -> str: - return self._name - - @property - def value(self): - return self._value - - def serialize(self) -> Any: - return self._value - - def tree(self) -> Token: - return Token(self.name, self.value) - - def __str__(self) -> str: - return str(self._value) - - def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken - - -class TokenSequence: - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] - - def joined(self): - return "".join(str(token) for token in self.tokens) - - -class LarkRule(ABC): - _classes = [] - - @staticmethod - @abstractmethod - def rule_name() -> str: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - def tree(self) -> Tree: - result_children = [] - for child in self._children: - if child is None: - continue - - if isinstance(child, TokenSequence): - result_children.extend(child.tree()) - else: - result_children.append(child.tree()) - - return Tree(self.rule_name(), result_children) - - def __init__(self, children): - self._children: List[LarkElement] = children - - def __init_subclass__(cls, **kwargs): - cls._classes.append(cls) - - def __repr__(self): - return f"" - - -class StartRule(LarkRule): - - _children: Tuple["BodyRule"] - - @staticmethod - def rule_name() -> str: - return "start" - - @property - def body(self) -> "BodyRule": - return self._children[0] - - def serialize(self) -> Any: - return self.body.serialize() - - -class BodyRule(LarkRule): - - _children: List[ - Union[ - "NewLineOrCommentRule", - "AttributeRule", - "BlockRule", - ] - ] - - @staticmethod - def rule_name() -> str: - return "body" - - def serialize(self) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] - comments = [] - - for child in self._children: - if isinstance(child, BlockRule): - blocks.append(child) - if isinstance(child, AttributeRule): - attributes.append(child) - if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() - if child_comments: - comments.extend(child_comments) - - result = {} - - for attribute in attributes: - result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} - ) - - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) - - if comments: - result["__comments__"] = comments - - return result - - -class BlockRule(LarkRule): - @staticmethod - def rule_name() -> str: - return "block" - - def __init__(self, children): - super().__init__(children) - *self._labels, self._body = children - - @property - def labels(self) -> List["IdentifierRule"]: - return list(filter(lambda label: label is not None, self._labels)) - - @property - def body(self) -> BodyRule: - return self._body - - def serialize(self) -> BodyRule: - result = self._body.serialize() - labels = self._labels - for label in reversed(labels[1:]): - result = {label.serialize(): result} - return result - - -class IdentifierRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "identifier" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class IntLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "int_lit" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class FloatLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "float_lit" - - def __init__(self, children): - print("float_lit", children) - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class StringLitRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "STRING_LIT" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined()[1:-1] - - -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: - return "expression" - - -class ExprTermRule(Expression): - @staticmethod - def rule_name() -> str: - return "expr_term" - - def __init__(self, children): - self._parentheses = False - if ( - isinstance(children[0], LarkToken) - and children[0].name == "LPAR" - and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" - ): - self._parentheses = True - children = children[1:-1] - super().__init__(children) - - @property - def parentheses(self) -> bool: - return self._parentheses - - def serialize(self) -> Any: - result = self._children[0].serialize() - if self._parentheses: - result = wrap_into_parentheses(result) - result = to_dollar_string(result) - return result - - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - - -class ConditionalRule(ExprTermRule): - - _children: Tuple[ - Expression, - Expression, - Expression, - ] - - @staticmethod - def rule_name(): - return "conditional" - - @property - def condition(self) -> Expression: - return self._children[0] - - @property - def if_true(self) -> Expression: - return self._children[1] - - @property - def if_false(self) -> Expression: - return self._children[2] - - def __init__(self, children): - super().__init__(children) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" - return to_dollar_string(result) - - -class BinaryOperatorRule(LarkRule): - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "binary_operator" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - -class BinaryTermRule(LarkRule): - _children: Tuple[ - BinaryOperatorRule, - Optional["NewLineOrCommentRule"], - ExprTermRule, - ] - - @staticmethod - def rule_name() -> str: - return "binary_term" - - def __init__(self, children): - if len(children) == 2: - children.insert(1, None) - super().__init__(children) - - @property - def binary_operator(self) -> BinaryOperatorRule: - return self._children[0] - - @property - def comment(self) -> Optional["NewLineOrCommentRule"]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - - @property - def expr_term(self) -> ExprTermRule: - return self._children[2] - - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" - - -class UnaryOpRule(Expression): - _children: Tuple[LarkToken, ExprTermRule] - - @staticmethod - def rule_name() -> str: - return "unary_op" - - @property - def unary_operator(self) -> str: - return str(self._children[0]) - - @property - def expr_term(self): - return self._children[1] - - def serialize(self) -> Any: - return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") - - -class BinaryOpRule(Expression): - _children: Tuple[ - ExprTermRule, - BinaryTermRule, - "NewLineOrCommentRule", - ] - - @staticmethod - def rule_name() -> str: - return "binary_op" - - @property - def expr_term(self) -> ExprTermRule: - return self._children[0] - - @property - def binary_term(self) -> BinaryTermRule: - return self._children[1] - - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") - - -class AttributeRule(LarkRule): - _children: Tuple[ - IdentifierRule, - EQ_Token, - Expression, - ] - - @staticmethod - def rule_name() -> str: - return "attribute" - - @property - def identifier(self) -> IdentifierRule: - return self._children[0] - - @property - def expression(self) -> Expression: - return self._children[2] - - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} - - -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "new_line_or_comment" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() - if comment == "\n": - return None - - comment = comment.strip() - comments = comment.split("\n") - - result = [] - for comment in comments: - if comment.startswith("//"): - comment = comment[2:] - - elif comment.startswith("#"): - comment = comment[1:] - - if comment != "": - result.append(comment.strip()) - - return result From d9c2eca1f99a7edf9b6e16603755c5113dc8a8d7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Apr 2025 16:19:11 +0200 Subject: [PATCH 03/42] batch of different changes --- hcl2/rule_transformer/rules/abstract.py | 32 +++--- hcl2/rule_transformer/rules/base.py | 55 ++++++--- hcl2/rule_transformer/rules/expression.py | 104 +++++++++++------- hcl2/rule_transformer/rules/token_sequence.py | 31 +++--- hcl2/rule_transformer/rules/whitespace.py | 11 +- hcl2/rule_transformer/transformer.py | 12 +- hcl2/rule_transformer/utils.py | 9 ++ 7 files changed, 152 insertions(+), 102 deletions(-) diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 37f63a03..6c650ea3 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -4,14 +4,16 @@ from lark import Token, Tree from lark.tree import Meta +from hcl2.rule_transformer.utils import SerializationOptions + class LarkElement(ABC): @abstractmethod - def tree(self) -> Token: + def reverse(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @@ -28,10 +30,10 @@ def name(self) -> str: def value(self): return self._value - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self._value - def tree(self) -> Token: + def reverse(self) -> Token: return Token(self.name, self.value) def __str__(self) -> str: @@ -47,45 +49,45 @@ def __repr__(self) -> str: RPAR_TOKEN = LarkToken # right parenthesis -class TokenSequence: +class TokenSequence(LarkElement): def __init__(self, tokens: List[LarkToken]): self.tokens = tokens - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] + def reverse(self) -> List[Token]: + return [token.reverse() for token in self.tokens] - def joined(self): + def serialize(self, options: SerializationOptions = SerializationOptions()): return "".join(str(token) for token in self.tokens) -class LarkRule(ABC): +class LarkRule(LarkElement, ABC): @staticmethod @abstractmethod def rule_name() -> str: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @property def children(self) -> List[LarkElement]: return self._children - def tree(self) -> Tree: + def reverse(self) -> Tree: result_children = [] for child in self._children: if child is None: continue if isinstance(child, TokenSequence): - result_children.extend(child.tree()) + result_children.extend(child.reverse()) else: - result_children.append(child.tree()) + result_children.append(child.reverse()) - return Tree(self.rule_name(), result_children) + return Tree(self.rule_name(), result_children, meta=self._meta) - def __init__(self, children, meta: Optional[Meta] = None): + def __init__(self, children: List, meta: Optional[Meta] = None): self._children = children self._meta = meta diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index f46d8039..76d014e9 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import Tuple, Any, List, Union, Optional from lark.tree import Meta @@ -7,6 +8,7 @@ from hcl2.rule_transformer.rules.token_sequence import IdentifierRule from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions class AttributeRule(LarkRule): @@ -28,8 +30,8 @@ def identifier(self) -> IdentifierRule: def expression(self) -> Expression: return self._children[2] - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): @@ -46,18 +48,23 @@ class BodyRule(LarkRule): def rule_name() -> str: return "body" - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] - + inline_comments = [] for child in self._children: + if isinstance(child, BlockRule): blocks.append(child) + if isinstance(child, AttributeRule): attributes.append(child) + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) + if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() + child_comments = child.to_list() if child_comments: comments.extend(child_comments) @@ -65,15 +72,27 @@ def serialize(self) -> Any: for attribute in attributes: result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} + { + attribute.identifier.serialize( + options + ): attribute.expression.serialize(options) + } ) - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) + result_blocks = defaultdict(list) + for block in blocks: + name = block.labels[0].serialize(options) + if name in result.keys(): + raise RuntimeError(f"Attribute {name} is already defined.") + result_blocks[name].append(block.serialize(options)) + + result.update(**result_blocks) - if comments: - result["__comments__"] = comments + if options.with_comments: + if comments: + result["__comments__"] = comments + if inline_comments: + result["__inline_comments__"] = inline_comments return result @@ -90,8 +109,8 @@ def rule_name() -> str: def body(self) -> BodyRule: return self._children[0] - def serialize(self) -> Any: - return self.body.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.body.serialize(options) class BlockRule(LarkRule): @@ -103,7 +122,7 @@ def rule_name() -> str: return "block" def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children) + super().__init__(children, meta) *self._labels, self._body = children @property @@ -114,9 +133,11 @@ def labels(self) -> List[IdentifierRule]: def body(self) -> BodyRule: return self._body - def serialize(self) -> BodyRule: - result = self._body.serialize() + def serialize( + self, options: SerializationOptions = SerializationOptions() + ) -> BodyRule: + result = self._body.serialize(options) labels = self._labels for label in reversed(labels[1:]): - result = {label.serialize(): result} + result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 2a38912a..16daf310 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -16,6 +16,7 @@ wrap_into_parentheses, to_dollar_string, unwrap_dollar_string, + SerializationOptions, ) @@ -24,10 +25,35 @@ class Expression(LarkRule, ABC): def rule_name() -> str: return "expression" + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + result.extend(child.to_list()) + + elif isinstance(child, Expression): + result.extend(child.inline_comments()) + + return result + + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + class ExprTermRule(Expression): - _children: Tuple[ + type_ = Tuple[ Optional[LPAR_TOKEN], Optional[NewLineOrCommentRule], Expression, @@ -35,6 +61,8 @@ class ExprTermRule(Expression): Optional[RPAR_TOKEN], ] + _children: type_ + @staticmethod def rule_name() -> str: return "expr_term" @@ -48,34 +76,36 @@ def __init__(self, children, meta: Optional[Meta] = None): and children[-1].name == "RPAR" ): self._parentheses = True - children = children[1:-1] + else: + children = [None, *children, None] + + self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @property def parentheses(self) -> bool: return self._parentheses - def serialize(self) -> Any: - result = self._children[0].serialize() + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) return result - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - -class ConditionalRule(LarkRule): +class ConditionalRule(Expression): _children: Tuple[ Expression, + Optional[NewLineOrCommentRule], Expression, + Optional[NewLineOrCommentRule], + Optional[NewLineOrCommentRule], Expression, ] @@ -83,27 +113,28 @@ class ConditionalRule(LarkRule): def rule_name(): return "conditional" + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 4]) + super().__init__(children, meta) + @property def condition(self) -> Expression: return self._children[0] @property def if_true(self) -> Expression: - return self._children[1] + return self._children[2] @property def if_false(self) -> Expression: - return self._children[2] + return self._children[5] - def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children, meta) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) -class BinaryTermRule(LarkRule): +class BinaryTermRule(Expression): _children: Tuple[ BinaryOperatorRule, @@ -116,28 +147,19 @@ def rule_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - if len(children) == 2: - children.insert(1, None) + self._possibly_insert_null_comments(children, [1]) super().__init__(children, meta) @property def binary_operator(self) -> BinaryOperatorRule: return self._children[0] - @property - def comment(self) -> Optional[NewLineOrCommentRule]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - @property def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" class BinaryOpRule(Expression): @@ -159,10 +181,14 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + lhs = self.expr_term.serialize(options) + operator = self.binary_term.binary_operator.serialize(options) + rhs = self.binary_term.expr_term.serialize(options) + # below line is to avoid dollar string nested inside another dollar string, e.g.: + # hcl2: 15 + (10 * 12) + # desired json: "${15 + (10 * 12)}" + # undesired json: "${15 + ${(10 * 12)}}" rhs = unwrap_dollar_string(rhs) return to_dollar_string(f"{lhs} {operator} {rhs}") @@ -183,5 +209,5 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 66e22e2f..174e2510 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,9 +1,10 @@ from abc import ABC -from typing import Tuple, Any, List, Optional +from typing import Tuple, Any, List, Optional, Type from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions class TokenSequenceRule(LarkRule, ABC): @@ -12,10 +13,13 @@ class TokenSequenceRule(LarkRule, ABC): def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): children = [TokenSequence(children)] - super().__init__(children) + super().__init__(children, meta) - def serialize(self) -> Any: - return self._children[0].joined() + def serialized_type(self) -> Type: + return str + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.serialized_type()(self._children[0].serialize(options)) class IdentifierRule(TokenSequenceRule): @@ -23,17 +27,14 @@ class IdentifierRule(TokenSequenceRule): def rule_name() -> str: return "identifier" - def serialize(self) -> str: - return str(super().serialize()) - class IntLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "int_lit" - def serialize(self) -> float: - return int(super().serialize()) + def serialized_type(self) -> Type: + return int class FloatLitRule(TokenSequenceRule): @@ -41,23 +42,19 @@ class FloatLitRule(TokenSequenceRule): def rule_name() -> str: return "float_lit" - def serialize(self) -> float: - return float(super().serialize()) + def serialized_type(self) -> Type: + return float class StringLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: + # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; + # nevertheless, try to change it to a rule in hcl2.lark return "STRING_LIT" - def serialize(self) -> str: - return str(super().serialize()) - class BinaryOperatorRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "binary_operator" - - def serialize(self) -> str: - return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index f56a386e..b37cedc4 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,6 +1,7 @@ from typing import Optional, List, Any from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.utils import SerializationOptions class NewLineOrCommentRule(LarkRule): @@ -11,11 +12,13 @@ class NewLineOrCommentRule(LarkRule): def rule_name() -> str: return "new_line_or_comment" - def serialize(self) -> Any: - return TokenSequence(self._children).joined() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return TokenSequence(self._children).serialize(options) - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() + def to_list( + self, options: SerializationOptions = SerializationOptions() + ) -> Optional[List[str]]: + comment = self.serialize(options) if comment == "\n": return None diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 9e6af6ef..1c7d6157 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -40,13 +40,7 @@ class RuleTransformer(Transformer): def is_type_keyword(value: str) -> bool: return value in {"bool", "number", "string"} - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments + def __init__(self): super().__init__() def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: @@ -113,6 +107,4 @@ def attribute(self, meta: Meta, args) -> AttributeRule: @v_args(meta=True) def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args, meta) - return Discard + return NewLineOrCommentRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 060d3b53..e083d628 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,3 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class SerializationOptions: + with_comments: bool = True + with_meta: bool = False + + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): return False From 448ffd42050489eb92bbc5855a0905b04436c51f Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 4 Apr 2025 10:29:47 +0200 Subject: [PATCH 04/42] comments --- hcl2/rule_transformer/rules/whitespace.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index b37cedc4..96fe7c91 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -22,16 +22,19 @@ def to_list( if comment == "\n": return None - comment = comment.strip() comments = comment.split("\n") result = [] for comment in comments: - if comment.startswith("//"): - comment = comment[2:] + comment = comment.strip() - elif comment.startswith("#"): - comment = comment[1:] + for delimiter in ("//", "/*", "#"): + + if comment.startswith(delimiter): + comment = comment[len(delimiter) :] + + if comment.endswith("*/"): + comment = comment[:-2] if comment != "": result.append(comment.strip()) From 65f88bc3e7466b09108f4c0504c485d27e164558 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Jul 2025 17:03:05 +0200 Subject: [PATCH 05/42] various changes --- hcl2/parser.py | 4 +- hcl2/rule_transformer/editor.py | 77 ++++++ hcl2/rule_transformer/hcl2.lark | 166 +++++++++++ hcl2/rule_transformer/processor.py | 258 ++++++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 93 ++++--- hcl2/rule_transformer/rules/base.py | 53 ++-- hcl2/rule_transformer/rules/containers.py | 85 ++++++ hcl2/rule_transformer/rules/expression.py | 102 +++---- hcl2/rule_transformer/rules/indexing.py | 75 +++++ hcl2/rule_transformer/rules/literal_rules.py | 47 ++++ hcl2/rule_transformer/rules/strings.py | 50 ++++ hcl2/rule_transformer/rules/token_sequence.py | 116 ++++---- hcl2/rule_transformer/rules/tokens.py | 66 +++++ hcl2/rule_transformer/rules/tree.py | 106 +++++++ hcl2/rule_transformer/rules/whitespace.py | 46 +++- hcl2/rule_transformer/transformer.py | 103 +++++-- hcl2/rule_transformer/utils.py | 8 +- 17 files changed, 1232 insertions(+), 223 deletions(-) create mode 100644 hcl2/rule_transformer/editor.py create mode 100644 hcl2/rule_transformer/hcl2.lark create mode 100644 hcl2/rule_transformer/processor.py create mode 100644 hcl2/rule_transformer/rules/containers.py create mode 100644 hcl2/rule_transformer/rules/indexing.py create mode 100644 hcl2/rule_transformer/rules/literal_rules.py create mode 100644 hcl2/rule_transformer/rules/strings.py create mode 100644 hcl2/rule_transformer/rules/tokens.py create mode 100644 hcl2/rule_transformer/rules/tree.py diff --git a/hcl2/parser.py b/hcl2/parser.py index 79d50122..a0c87e34 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, @@ -29,7 +29,7 @@ def reconstruction_parser() -> Lark: if necessary. """ return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark", parser="lalr", # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: # diff --git a/hcl2/rule_transformer/editor.py b/hcl2/rule_transformer/editor.py new file mode 100644 index 00000000..9efce08f --- /dev/null +++ b/hcl2/rule_transformer/editor.py @@ -0,0 +1,77 @@ +import dataclasses +from copy import copy, deepcopy +from typing import List, Optional, Set, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.base import BlockRule, StartRule + + +@dataclasses.dataclass +class TreePathElement: + + name: str + index: int = 0 + + +@dataclasses.dataclass +class TreePath: + + elements: List[TreePathElement] = dataclasses.field(default_factory=list) + + @classmethod + def build(cls, elements: List[Tuple[str, Optional[int]] | str]): + results = [] + for element in elements: + if isinstance(element, tuple): + if len(element) == 1: + result = TreePathElement(element[0], 0) + else: + result = TreePathElement(*element) + else: + result = TreePathElement(element, 0) + + results.append(result) + + return cls(results) + + def __iter__(self): + return self.elements.__iter__() + + def __len__(self): + return self.elements.__len__() + + +class Editor: + def __init__(self, rules_tree: LarkRule): + self.rules_tree = rules_tree + + @classmethod + def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: + return cls._find_all(rules_tree, path_element.name)[path_element.index] + + @classmethod + def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: + children = [] + print("rule", rules_tree) + print("rule children", rules_tree.children) + for child in rules_tree.children: + if isinstance(child, LarkRule) and child.lark_name() == rule_name: + children.append(child) + + return children + + def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: + path = deepcopy(path.elements) + + current_rule = self.rules_tree + while len(path) > 0: + current_path, *path = path + print(current_path, path) + current_rule = self._find_one(current_rule, current_path) + + return self._find_all(current_rule, rule_name) + + # def visit(self, path: TreePath) -> "Editor": + # + # while len(path) > 1: + # current = diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark new file mode 100644 index 00000000..a7722118 --- /dev/null +++ b/hcl2/rule_transformer/hcl2.lark @@ -0,0 +1,166 @@ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments +NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ + +// Keywords +IF : "if" +IN : "in" +FOR : "for" +FOR_EACH : "for_each" + +// Identifiers and Names +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH + +// Literals +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) + +// Operators +BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +DOUBLE_EQ : "==" +NEQ : "!=" +LT : "<" +GT : ">" +LEQ : "<=" +GEQ : ">=" +MINUS : "-" +ASTERISK : "*" +SLASH : "/" +PERCENT : "%" +DOUBLE_AMP : "&&" +DOUBLE_PIPE : "||" +PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation +LPAR : "(" +RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" +COMMA : "," +DOT : "." +EQ : /[ \t]*=(?!=|>)/ +COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier: IDENTIFIER +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : expr_term | operation | conditional +interpolation: INTERP_START expression RBRACE +conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression + +// Operations +?operation : unary_op | binary_op +!unary_op : (MINUS | NOT) expr_term +binary_op : expression binary_term new_line_or_comment? +binary_term : binary_operator new_line_or_comment? expression +!binary_operator : BINARY_OP + +// Expression terms +expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR + | float_lit + | int_lit + | string + | tuple + | object + | function_call + | index_expr_term + | get_attr_expr_term + | identifier + | provider_function_call + | heredoc_template + | heredoc_template_trim + | attr_splat_expr_term + | full_splat_expr_term + | for_tuple_expr + | for_object_expr + +// Collections +tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB +object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +object_elem : object_elem_key ( EQ | COLON ) expression +object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression +object_elem_key_expression : LPAR expression RPAR +object_elem_key_dot_accessor : identifier (DOT identifier)+ + +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM + +// Functions +function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) +provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR + +// Indexing and attribute access +index_expr_term : expr_term index +get_attr_expr_term : expr_term get_attr +attr_splat_expr_term : expr_term attr_splat +full_splat_expr_term : expr_term full_splat +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT get_attr* +full_splat : FULL_SPLAT_START (get_attr | index)* + +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/processor.py b/hcl2/rule_transformer/processor.py new file mode 100644 index 00000000..b854aff5 --- /dev/null +++ b/hcl2/rule_transformer/processor.py @@ -0,0 +1,258 @@ +from copy import copy, deepcopy +from typing import ( + List, + Optional, + Union, + Callable, + Any, + Tuple, + Generic, + TypeVar, + cast, + Generator, +) + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + +T = TypeVar("T", bound=LarkRule) + + +class RulesProcessor(Generic[T]): + """""" + + @classmethod + def _traverse( + cls, + node: T, + predicate: Callable[[T], bool], + current_depth: int = 0, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor"]: + + results = [] + + if predicate(node): + results.append(cls(node)) + + if max_depth is not None and current_depth >= max_depth: + return results + + for child in node.children: + if child is None or not isinstance(child, LarkRule): + continue + + child_results = cls._traverse( + child, + predicate, + current_depth + 1, + max_depth, + ) + results.extend(child_results) + + return results + + def __init__(self, node: LarkRule): + self.node = node + + @property + def siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children + + @property + def next_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[self.node.index + 1 :] + + @property + def previous_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[: self.node.index - 1] + + def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: + child_processors = [self.__class__(child) for child in self.node.children] + yield self, child_processors + for processor in child_processors: + if isinstance(processor.node, LarkRule): + for result in processor.walk(): + yield result + + def find_block( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> "RulesProcessor[BlockRule]": + return self.find_blocks(labels, exact_match, max_depth)[0] + + def find_blocks( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor[BlockRule]"]: + """ + Find blocks by their labels. + + Args: + labels: List of label strings to match + exact_match: If True, all labels must match exactly. If False, labels can be a subset. + max_depth: Maximum depth to search + + Returns: + ... + """ + + def block_predicate(node: LarkRule) -> bool: + if not isinstance(node, BlockRule): + return False + + node_labels = [label.serialize() for label in node.labels] + + if exact_match: + return node_labels == labels + else: + # Check if labels is a prefix of node_labels + if len(labels) > len(node_labels): + return False + return node_labels[: len(labels)] == labels + + return cast( + List[RulesProcessor[BlockRule]], + self._traverse(self.node, block_predicate, max_depth=max_depth), + ) + + def attribute( + self, name: str, max_depth: Optional[int] = None + ) -> "RulesProcessor[AttributeRule]": + return self.find_attributes(name, max_depth)[0] + + def find_attributes( + self, name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor[AttributeRule]"]: + """ + Find attributes by their identifier name. + + Args: + name: Attribute name to search for + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching attributes + """ + + def attribute_predicate(node: LarkRule) -> bool: + if not isinstance(node, AttributeRule): + return False + return node.identifier.serialize() == name + + return self._traverse(self.node, attribute_predicate, max_depth=max_depth) + + def rule(self, rule_name: str, max_depth: Optional[int] = None): + return self.find_rules(rule_name, max_depth)[0] + + def find_rules( + self, rule_name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules of a specific type. + + Args: + rule_name: Name of the rule type to find + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + + def rule_predicate(node: LarkRule) -> bool: + return node.lark_name() == rule_name + + return self._traverse(self.node, rule_predicate, max_depth=max_depth) + + def find_by_predicate( + self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules matching a custom predicate. + + Args: + predicate: Function that returns True for nodes to collect + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + return self._traverse(self.node, predicate, max_depth) + + # Convenience methods + def get_all_blocks(self, max_depth: Optional[int] = None) -> List: + """Get all blocks in the tree.""" + return self.find_rules("block", max_depth) + + def get_all_attributes( + self, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """Get all attributes in the tree.""" + return self.find_rules("attribute", max_depth) + + def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in reversed(self.previous_siblings): + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in self.next_siblings: + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def append_child( + self, new_node: LarkRule, indentation: bool = True + ) -> "RulesProcessor": + children = self.node.children + if indentation: + if isinstance(children[-1], NewLineOrCommentRule): + children.pop() + children.append(NewLineOrCommentRule.from_string("\n ")) + + new_node = deepcopy(new_node) + new_node.set_parent(self.node) + new_node.set_index(len(children)) + children.append(new_node) + return self.__class__(new_node) + + def replace(self, new_node: LarkRule) -> "RulesProcessor": + new_node = deepcopy(new_node) + + self.node.parent.children.pop(self.node.index) + self.node.parent.children.insert(self.node.index, new_node) + new_node.set_parent(self.node.parent) + new_node.set_index(self.node.index) + return self.__class__(new_node) + + # def insert_before(self, new_node: LarkRule) -> bool: + # """Insert a new node before this one.""" + # if self.parent is None or self.parent_index < 0: + # return False + # + # try: + # self.parent.children.insert(self.parent_index, new_node) + # except (IndexError, AttributeError): + # return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 6c650ea3..d3a3b634 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional +from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree from lark.tree import Meta @@ -8,8 +8,23 @@ class LarkElement(ABC): + @property + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + def __init__(self, index: int = -1, parent: "LarkElement" = None): + self._index = index + self._parent = parent + + def set_index(self, i: int): + self._index = i + + def set_parent(self, node: "LarkElement"): + self._parent = node + @abstractmethod - def reverse(self) -> Any: + def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod @@ -17,53 +32,42 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A raise NotImplementedError() -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name +class LarkToken(LarkElement, ABC): + def __init__(self, value: Union[str, int]): self._value = value + super().__init__() @property - def name(self) -> str: - return self._name + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def serialize_conversion(self) -> Callable: + raise NotImplementedError() @property def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self._value + def serialize(self, options: SerializationOptions = SerializationOptions()): + return self.serialize_conversion(self.value) - def reverse(self) -> Token: - return Token(self.name, self.value) + def to_lark(self) -> Token: + return Token(self.lark_name, self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken -COLON_TOKEN = LarkToken -LPAR_TOKEN = LarkToken # left parenthesis -RPAR_TOKEN = LarkToken # right parenthesis - - -class TokenSequence(LarkElement): - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def reverse(self) -> List[Token]: - return [token.reverse() for token in self.tokens] - - def serialize(self, options: SerializationOptions = SerializationOptions()): - return "".join(str(token) for token in self.tokens) + return f"" class LarkRule(LarkElement, ABC): - @staticmethod + @property @abstractmethod - def rule_name() -> str: + def lark_name(self) -> str: raise NotImplementedError() @abstractmethod @@ -74,22 +78,33 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A def children(self) -> List[LarkElement]: return self._children - def reverse(self) -> Tree: + @property + def parent(self): + return self._parent + + @property + def index(self): + return self._index + + def to_lark(self) -> Tree: result_children = [] for child in self._children: if child is None: continue - if isinstance(child, TokenSequence): - result_children.extend(child.reverse()) - else: - result_children.append(child.reverse()) + result_children.append(child.to_lark()) - return Tree(self.rule_name(), result_children, meta=self._meta) + return Tree(self.lark_name, result_children, meta=self._meta) - def __init__(self, children: List, meta: Optional[Meta] = None): + def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + super().__init__() self._children = children self._meta = meta + for index, child in enumerate(children): + if child is not None: + child.set_index(index) + child.set_parent(self) + def __repr__(self): - return f"" + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 76d014e9..6d0c4924 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,9 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.token_sequence import IdentifierRule +from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions @@ -13,17 +13,17 @@ class AttributeRule(LarkRule): _children: Tuple[ - IdentifierRule, - EQ_Token, + IdentifierToken, + EQ_TOKEN, Expression, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "attribute" @property - def identifier(self) -> IdentifierRule: + def identifier(self) -> IdentifierToken: return self._children[0] @property @@ -39,13 +39,13 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - AttributeRule, + # AttributeRule, "BlockRule", ] ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "body" def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: @@ -71,13 +71,7 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A result = {} for attribute in attributes: - result.update( - { - attribute.identifier.serialize( - options - ): attribute.expression.serialize(options) - } - ) + result.update(attribute.serialize(options)) result_blocks = defaultdict(list) for block in blocks: @@ -101,14 +95,14 @@ class StartRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "start" - @property def body(self) -> BodyRule: return self._children[0] + @property + def lark_name(self) -> str: + return "start" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self.body.serialize(options) @@ -117,16 +111,19 @@ class BlockRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "block" - def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - *self._labels, self._body = children + + *self._labels, self._body = [ + child for child in children if not isinstance(child, LarkToken) + ] + + @property + def lark_name(self) -> str: + return "block" @property - def labels(self) -> List[IdentifierRule]: + def labels(self) -> List[IdentifierToken]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -138,6 +135,6 @@ def serialize( ) -> BodyRule: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels[1:]): + for label in reversed(labels): result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py new file mode 100644 index 00000000..c39f3ba2 --- /dev/null +++ b/hcl2/rule_transformer/rules/containers.py @@ -0,0 +1,85 @@ +from typing import Tuple, List, Optional, Union, Any + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, +) +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.tokens import ( + COLON_TOKEN, + EQ_TOKEN, + LBRACE_TOKEN, + COMMA_TOKEN, + RBRACE_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class ObjectElemKeyRule(LarkRule): + _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + @staticmethod + def lark_name() -> str: + return "object_elem_key" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.children[0].serialize(options) + + +class ObjectElemRule(LarkRule): + + _children: Tuple[ + ObjectElemKeyRule, + Union[EQ_TOKEN, COLON_TOKEN], + Expression, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem" + + @property + def key(self) -> ObjectElemKeyRule: + return self.children[0] + + @property + def expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return { + self.children[0].serialize(options): self.children[2].serialize(options) + } + + +class ObjectRule(InlineCommentMixIn): + + _children: Tuple[ + LBRACE_TOKEN, + Optional[NewLineOrCommentRule], + Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], + RBRACE_TOKEN, + ] + + @staticmethod + def lark_name() -> str: + return "object" + + @property + def elements(self) -> List[ObjectElemRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) + ] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = {} + for element in self.elements: + result.update(element.serialize()) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 16daf310..8a03f813 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -1,17 +1,18 @@ from abc import ABC -from typing import Any, Tuple, Optional, List +from copy import deepcopy +from typing import Any, Tuple, Optional -from lark import Tree, Token from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import ( - LarkRule, LarkToken, - LPAR_TOKEN, - RPAR_TOKEN, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule +from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, @@ -20,36 +21,14 @@ ) -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: +class Expression(InlineCommentMixIn, ABC): + @property + def lark_name(self) -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - def inline_comments(self): - result = [] - for child in self._children: - - if isinstance(child, NewLineOrCommentRule): - result.extend(child.to_list()) - - elif isinstance(child, Expression): - result.extend(child.inline_comments()) - - return result - - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): - for index in indexes: - try: - child = children[index] - except IndexError: - children.insert(index, None) - else: - if not isinstance(child, NewLineOrCommentRule): - children.insert(index, None) - class ExprTermRule(Expression): @@ -63,17 +42,17 @@ class ExprTermRule(Expression): _children: type_ - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].name == "LPAR" + and children[0].lark_name == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" + and children[-1].lark_name == "RPAR" ): self._parentheses = True else: @@ -90,11 +69,14 @@ def parentheses(self) -> bool: def expression(self) -> Expression: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) + + if options.unwrap_dollar_string: + result = unwrap_dollar_string(result) return result @@ -102,19 +84,21 @@ class ConditionalRule(Expression): _children: Tuple[ Expression, + QMARK_TOKEN, Optional[NewLineOrCommentRule], Expression, Optional[NewLineOrCommentRule], + COLON_TOKEN, Optional[NewLineOrCommentRule], Expression, ] - @staticmethod - def rule_name(): + @property + def lark_name(self) -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 4]) + self._possibly_insert_null_comments(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -123,13 +107,15 @@ def condition(self) -> Expression: @property def if_true(self) -> Expression: - return self._children[2] + return self._children[3] @property def if_false(self) -> Expression: - return self._children[5] + return self._children[7] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + options = options.replace(unwrap_dollar_string=True) + print(self.condition) result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) @@ -142,8 +128,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -166,11 +152,11 @@ class BinaryOpRule(Expression): _children: Tuple[ ExprTermRule, BinaryTermRule, - NewLineOrCommentRule, + Optional[NewLineOrCommentRule], ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_op" @property @@ -182,23 +168,23 @@ def binary_term(self) -> BinaryTermRule: return self._children[1] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - lhs = self.expr_term.serialize(options) - operator = self.binary_term.binary_operator.serialize(options) - rhs = self.binary_term.expr_term.serialize(options) - # below line is to avoid dollar string nested inside another dollar string, e.g.: - # hcl2: 15 + (10 * 12) - # desired json: "${15 + (10 * 12)}" - # undesired json: "${15 + ${(10 * 12)}}" - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") + children_options = options.replace(unwrap_dollar_string=True) + lhs = self.expr_term.serialize(children_options) + operator = self.binary_term.binary_operator.serialize(children_options) + rhs = self.binary_term.expr_term.serialize(children_options) + + result = f"{lhs} {operator} {rhs}" + if options.unwrap_dollar_string: + return result + return to_dollar_string(result) class UnaryOpRule(Expression): _children: Tuple[LarkToken, ExprTermRule] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "unary_op" @property diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py new file mode 100644 index 00000000..ce23d040 --- /dev/null +++ b/hcl2/rule_transformer/rules/indexing.py @@ -0,0 +1,75 @@ +from typing import List, Optional, Tuple, Any + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.tokens import ( + DOT_TOKEN, + IntToken, + LSQB_TOKEN, + RSQB_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string + + +class ShortIndexRule(LarkRule): + + _children: Tuple[ + DOT_TOKEN, + IntToken, + ] + + @property + def lark_name(self) -> str: + return "short_index" + + @property + def index(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f".{self.index.serialize(options)}" + + +class SqbIndex(InlineCommentMixIn): + _children: Tuple[ + LSQB_TOKEN, + Optional[NewLineOrCommentRule], + ExprTermRule, + Optional[NewLineOrCommentRule], + RSQB_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "braces_index" + + @property + def index_expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"[{self.index_expression.serialize(options)}]" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3]) + super().__init__(children, meta) + + +class IndexExprTermRule(Expression): + + _children: Tuple[ExprTermRule, SqbIndex] + + @property + def lark_name(self) -> str: + return "index_expr_term" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string( + f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + ) diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py new file mode 100644 index 00000000..06ca99ae --- /dev/null +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -0,0 +1,47 @@ +from abc import ABC +from typing import Any, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions + + +class TokenRule(LarkRule, ABC): + + _children: Tuple[LarkToken] + + @property + def token(self) -> LarkToken: + return self._children[0] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.token.serialize() + + +class IdentifierRule(TokenRule): + @property + def lark_name(self) -> str: + return "identifier" + + +class IntLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "int_lit" + + +class FloatLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "float_lit" + + +class StringPartRule(TokenRule): + @property + def lark_name(self) -> str: + return "string" + + +class BinaryOperatorRule(TokenRule): + @property + def lark_name(self) -> str: + return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py new file mode 100644 index 00000000..0f53c55a --- /dev/null +++ b/hcl2/rule_transformer/rules/strings.py @@ -0,0 +1,50 @@ +from typing import Tuple, Optional, List, Any, Union + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.tokens import ( + INTERP_START_TOKEN, + RBRACE_TOKEN, + DBLQUOTE_TOKEN, + STRING_CHARS_TOKEN, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + + @property + def lark_name(self) -> str: + return "string" + + @property + def string_parts(self): + return self.children[1:-1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class InterpolationRule(LarkRule): + + _children: Tuple[ + INTERP_START_TOKEN, + Expression, + RBRACE_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "interpolation" + + @property + def expression(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return "${" + self.expression.serialize(options) + "}" diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 174e2510..66d780b3 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,60 +1,56 @@ -from abc import ABC -from typing import Tuple, Any, List, Optional, Type - -from lark.tree import Meta - -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions - - -class TokenSequenceRule(LarkRule, ABC): - - _children: Tuple[TokenSequence] - - def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): - children = [TokenSequence(children)] - super().__init__(children, meta) - - def serialized_type(self) -> Type: - return str - - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.serialized_type()(self._children[0].serialize(options)) - - -class IdentifierRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "identifier" - - -class IntLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "int_lit" - - def serialized_type(self) -> Type: - return int - - -class FloatLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "float_lit" - - def serialized_type(self) -> Type: - return float - - -class StringLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; - # nevertheless, try to change it to a rule in hcl2.lark - return "STRING_LIT" - - -class BinaryOperatorRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "binary_operator" +# from abc import ABC +# from typing import Tuple, Any, List, Optional, Type +# +# from lark.tree import Meta +# +# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +# from hcl2.rule_transformer.utils import SerializationOptions +# +# +# class TokenSequenceRule(LarkRule, ABC): +# +# _children: Tuple[TokenSequence] +# +# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): +# children = [TokenSequence(children)] +# super().__init__(children, meta) +# +# def serialized_type(self) -> Type: +# return str +# +# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: +# return self.serialized_type()(self._children[0].serialize(options)) +# +# +# class IdentifierRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "identifier" +# +# +# class IntLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "int_lit" +# +# def serialized_type(self) -> Type: +# return int +# +# +# class FloatLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "float_lit" +# +# def serialized_type(self) -> Type: +# return float +# +# +# class StringLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; +# # nevertheless, try to change it to a rule in the grammar +# return "STRING_LIT" +# +# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py new file mode 100644 index 00000000..18e1ac07 --- /dev/null +++ b/hcl2/rule_transformer/rules/tokens.py @@ -0,0 +1,66 @@ +from typing import Callable, Any + +from hcl2.rule_transformer.rules.abstract import LarkToken + + +class StringToken(LarkToken): + def __init__(self, name: str, value: Any): + super().__init__(value) + self._name = name + + @property + def lark_name(self) -> str: + return self._name + + @property + def serialize_conversion(self) -> Callable: + return str + + +# explicitly define various kinds of string-based tokens +STRING_CHARS_TOKEN = StringToken +ESCAPED_INTERPOLATION_TOKEN = StringToken +BINARY_OP_TOKEN = StringToken +EQ_TOKEN = StringToken +COLON_TOKEN = StringToken +LPAR_TOKEN = StringToken # ( +RPAR_TOKEN = StringToken # ) +LBRACE_TOKEN = StringToken # { +RBRACE_TOKEN = StringToken # } +DOT_TOKEN = StringToken +COMMA_TOKEN = StringToken +QMARK_TOKEN = StringToken +LSQB_TOKEN = StringToken # [ +RSQB_TOKEN = StringToken # ] +INTERP_START_TOKEN = StringToken # ${ +DBLQUOTE_TOKEN = StringToken # " + + +class IdentifierToken(LarkToken): + @property + def lark_name(self) -> str: + return "IDENTIFIER" + + @property + def serialize_conversion(self) -> Callable: + return str + + +class IntToken(LarkToken): + @property + def lark_name(self) -> str: + return "INT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return int + + +class FloatToken(LarkToken): + @property + def lark_name(self) -> str: + return "FLOAT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return float diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rule_transformer/rules/tree.py new file mode 100644 index 00000000..e39d2077 --- /dev/null +++ b/hcl2/rule_transformer/rules/tree.py @@ -0,0 +1,106 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Any, Union + + +class LarkNode(ABC): + """Base class for all nodes in the tree""" + + def __init__(self, index: int = -1, parent: Optional["Node"] = None): + self._index = index + self._parent = parent + + @property + def parent(self) -> Optional["Node"]: + return self._parent + + @property + def index(self) -> int: + return self._index + + def set_parent(self, parent: "Node"): + self._parent = parent + + def set_index(self, index: int): + self._index = index + + @abstractmethod + def serialize(self, options=None) -> Any: + pass + + @abstractmethod + def to_lark(self) -> Any: + """Convert back to Lark representation""" + pass + + def is_leaf(self) -> bool: + """Check if this is a leaf node (atomic token)""" + return isinstance(self, LeafNode) + + def is_sequence(self) -> bool: + """Check if this is a token sequence node""" + return isinstance(self, SequenceNode) + + def is_internal(self) -> bool: + """Check if this is an internal node (grammar rule)""" + return isinstance(self, InternalNode) + + def is_atomic(self) -> bool: + """Check if this represents an atomic value (leaf or sequence)""" + return self.is_leaf() or self.is_sequence() + + +class LarkLeaf(Node, ABC): + """""" + + def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): + super().__init__(index, parent) + self._value = value + + @property + def value(self) -> Any: + return self._value + + def serialize(self, options=None) -> Any: + return self._value + + +class InternalNode(Node): + def __init__( + self, children: List[Node], index: int = -1, parent: Optional[Node] = None + ): + super().__init__(index, parent) + self._children = children or [] + + # Set parent and index for all children + for i, child in enumerate(self._children): + if child is not None: + child.set_parent(self) + child.set_index(i) + + @property + def children(self) -> List[Node]: + return self._children + + def add_child(self, child: Node): + """Add a child to this internal node""" + child.set_parent(self) + child.set_index(len(self._children)) + self._children.append(child) + + def remove_child(self, index: int) -> Optional[Node]: + """Remove child at given index""" + if 0 <= index < len(self._children): + child = self._children.pop(index) + if child: + child.set_parent(None) + # Update indices for remaining children + for i in range(index, len(self._children)): + if self._children[i]: + self._children[i].set_index(i) + return child + return None + + @abstractmethod + def rule_name(self) -> str: + """The name of the grammar rule this represents""" + pass diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 96fe7c91..65d5dd9c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,19 +1,19 @@ -from typing import Optional, List, Any +from abc import ABC +from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.literal_rules import TokenRule from hcl2.rule_transformer.utils import SerializationOptions -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: +class NewLineOrCommentRule(TokenRule): + @property + def lark_name(self) -> str: return "new_line_or_comment" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return TokenSequence(self._children).serialize(options) + @classmethod + def from_string(cls, string: str) -> "NewLineOrCommentRule": + return cls([LarkToken("NL_OR_COMMENT", string)]) def to_list( self, options: SerializationOptions = SerializationOptions() @@ -40,3 +40,29 @@ def to_list( result.append(comment.strip()) return result + + +class InlineCommentMixIn(LarkRule, ABC): + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + comments = child.to_list() + if comments is not None: + result.extend(comments) + + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1c7d6157..31e88d61 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,30 +1,45 @@ # pylint: disable=missing-function-docstring,unused-argument from typing import List, Union -from lark import Transformer, Tree, Token +from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from lark.visitors import _Leaf_T, Discard, v_args -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) +from hcl2.rule_transformer.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, +) from hcl2.rule_transformer.rules.expression import ( BinaryTermRule, - ConditionalRule, - ExprTermRule, - BinaryOpRule, UnaryOpRule, + BinaryOpRule, + ExprTermRule, + ConditionalRule, ) -from hcl2.rule_transformer.rules.token_sequence import ( - IdentifierRule, - IntLitRule, +from hcl2.rule_transformer.rules.indexing import ( + IndexExprTermRule, + SqbIndex, + ShortIndexRule, +) +from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, - StringLitRule, + IntLitRule, + IdentifierRule, BinaryOperatorRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule +from hcl2.rule_transformer.rules.tokens import ( + IdentifierToken, + StringToken, + IntToken, + FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -36,18 +51,24 @@ class RuleTransformer(Transformer): with_meta: bool - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} + def transform(self, tree: Tree) -> StartRule: + return super().transform(tree) - def __init__(self): + def __init__(self, discard_new_line_or_comments: bool = False): super().__init__() + self.discard_new_line_or_comments = discard_new_line_or_comments - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) + def __default_token__(self, token: Token) -> StringToken: + return StringToken(token.type, token.value) + + def IDENTIFIER(self, token: Token) -> IdentifierToken: + return IdentifierToken(token.value) - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) + def INT_LITERAL(self, token: Token) -> IntToken: + return IntToken(token.value) + + def FLOAT_LITERAL(self, token: Token) -> FloatToken: + return FloatToken(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -61,6 +82,16 @@ def body(self, meta: Meta, args) -> BodyRule: def block(self, meta: Meta, args) -> BlockRule: return BlockRule(args, meta) + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self.discard_new_line_or_comments: + return Discard + return NewLineOrCommentRule(args, meta) + @v_args(meta=True) def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) @@ -74,8 +105,16 @@ def float_lit(self, meta: Meta, args) -> FloatLitRule: return FloatLitRule(args, meta) @v_args(meta=True) - def string_lit(self, meta: Meta, args) -> StringLitRule: - return StringLitRule(args, meta) + def string(self, meta: Meta, args) -> StringRule: + return StringRule(args, meta) + + @v_args(meta=True) + def string_part(self, meta: Meta, args) -> StringPartRule: + return StringPartRule(args, meta) + + @v_args(meta=True) + def interpolation(self, meta: Meta, args) -> InterpolationRule: + return InterpolationRule(args, meta) @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: @@ -102,9 +141,25 @@ def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) @v_args(meta=True) - def attribute(self, meta: Meta, args) -> AttributeRule: - return AttributeRule(args, meta) + def object(self, meta: Meta, args) -> ObjectRule: + return ObjectRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - return NewLineOrCommentRule(args, meta) + def object_elem(self, meta: Meta, args) -> ObjectElemRule: + return ObjectElemRule(args, meta) + + @v_args(meta=True) + def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: + return ObjectElemKeyRule(args, meta) + + @v_args(meta=True) + def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: + return IndexExprTermRule(args, meta) + + @v_args(meta=True) + def braces_index(self, meta: Meta, args) -> SqbIndex: + return SqbIndex(args, meta) + + @v_args(meta=True) + def short_index(self, meta: Meta, args) -> ShortIndexRule: + return ShortIndexRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index e083d628..6a6ed661 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,11 +1,15 @@ -from dataclasses import dataclass +from dataclasses import dataclass, replace @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - + unwrap_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationOptions": + return replace(self, **kwargs) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From 5a10fece33cf401c4e2b23a1655e983c3c708e55 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 23 Jul 2025 11:48:44 +0200 Subject: [PATCH 06/42] batch of changes --- hcl2/parser.py | 2 +- hcl2/reconstructor.py | 7 +- hcl2/rule_transformer/deserializer.py | 31 +++ hcl2/rule_transformer/hcl2.lark | 25 +- hcl2/rule_transformer/rules/abstract.py | 36 ++- hcl2/rule_transformer/rules/base.py | 61 +++-- hcl2/rule_transformer/rules/containers.py | 165 ++++++++++++-- .../rules/{expression.py => expressions.py} | 135 ++++++----- hcl2/rule_transformer/rules/functions.py | 104 +++++++++ hcl2/rule_transformer/rules/indexing.py | 215 ++++++++++++++++-- hcl2/rule_transformer/rules/literal_rules.py | 34 +-- hcl2/rule_transformer/rules/strings.py | 42 ++-- hcl2/rule_transformer/rules/token_sequence.py | 56 ----- hcl2/rule_transformer/rules/tokens.py | 99 ++++---- hcl2/rule_transformer/rules/whitespace.py | 4 +- hcl2/rule_transformer/transformer.py | 90 ++++++-- hcl2/rule_transformer/utils.py | 41 +++- 17 files changed, 835 insertions(+), 312 deletions(-) create mode 100644 hcl2/rule_transformer/deserializer.py rename hcl2/rule_transformer/rules/{expression.py => expressions.py} (53%) create mode 100644 hcl2/rule_transformer/rules/functions.py delete mode 100644 hcl2/rule_transformer/rules/token_sequence.py diff --git a/hcl2/parser.py b/hcl2/parser.py index a0c87e34..3e524736 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark.lark", + "rule_transformer/hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 7f957d7b..555edcf6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -167,12 +167,17 @@ def _should_add_space(self, rule, current_terminal, is_block_label: bool = False if self._is_equals_sign(current_terminal): return True + if is_block_label: + pass + # print(rule, self._last_rule, current_terminal, self._last_terminal) + if is_block_label and isinstance(rule, Token) and rule.value == "string": if ( current_terminal == self._last_terminal == Terminal("DBLQUOTE") or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("NAME") + and self._last_terminal == Terminal("IDENTIFIER") ): + # print("true") return True # if we're in a ternary or binary operator, add space around the operator diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py new file mode 100644 index 00000000..5bdcf775 --- /dev/null +++ b/hcl2/rule_transformer/deserializer.py @@ -0,0 +1,31 @@ +import json +from typing import Any, TextIO, List + +from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.utils import DeserializationOptions + + +class Deserializer: + def __init__(self, options=DeserializationOptions()): + self.options = options + + def load_python(self, value: Any) -> LarkElement: + pass + + def loads(self, value: str) -> LarkElement: + return self.load_python(json.loads(value)) + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + def _deserialize(self, value: Any) -> LarkElement: + pass + + def _deserialize_dict(self, value: dict) -> LarkRule: + pass + + def _deserialize_list(self, value: List) -> LarkRule: + pass + + def _deserialize_expression(self, value: str) -> LarkRule: + pass diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index a7722118..3f8d913e 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -11,11 +11,9 @@ IN : "in" FOR : "for" FOR_EACH : "for_each" -// Identifiers and Names -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH // Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ DECIMAL : "0".."9" @@ -91,7 +89,8 @@ block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRAC new_line_or_comment: ( NL_OR_COMMENT )+ // Basic literals and identifiers -identifier: IDENTIFIER +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH int_lit: INT_LITERAL float_lit: FLOAT_LITERAL string: DBLQUOTE string_part* DBLQUOTE @@ -118,21 +117,20 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr // Collections -tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB -object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR @@ -143,9 +141,8 @@ heredoc_template : HEREDOC_TEMPLATE heredoc_template_trim : HEREDOC_TEMPLATE_TRIM // Functions -function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) -provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) // Indexing and attribute access index_expr_term : expr_term index @@ -156,7 +153,7 @@ full_splat_expr_term : expr_term full_splat braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB short_index : DOT INT_LITERAL get_attr : DOT identifier -attr_splat : ATTR_SPLAT get_attr* +attr_splat : ATTR_SPLAT (get_attr | index)* full_splat : FULL_SPLAT_START (get_attr | index)* // For expressions diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index d3a3b634..e32d9ddb 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -2,15 +2,16 @@ from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree +from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class LarkElement(ABC): - @property + @staticmethod @abstractmethod - def lark_name(self) -> str: + def lark_name() -> str: raise NotImplementedError() def __init__(self, index: int = -1, parent: "LarkElement" = None): @@ -28,7 +29,9 @@ def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @@ -37,11 +40,6 @@ def __init__(self, value: Union[str, int]): self._value = value super().__init__() - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @property @abstractmethod def serialize_conversion(self) -> Callable: @@ -51,27 +49,26 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.serialize_conversion(self.value) def to_lark(self) -> Token: - return Token(self.lark_name, self.value) + return Token(self.lark_name(), self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" + return f"" class LarkRule(LarkElement, ABC): - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @property @@ -94,7 +91,7 @@ def to_lark(self) -> Tree: result_children.append(child.to_lark()) - return Tree(self.lark_name, result_children, meta=self._meta) + return Tree(self.lark_name(), result_children, meta=self._meta) def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() @@ -103,6 +100,7 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: + print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 6d0c4924..da74954b 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,34 +3,37 @@ from lark.tree import Meta +from hcl2.dict_transformer import START_LINE, END_LINE from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): _children: Tuple[ - IdentifierToken, - EQ_TOKEN, - Expression, + NAME, + EQ, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "attribute" @property - def identifier(self) -> IdentifierToken: + def identifier(self) -> NAME: return self._children[0] @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return {self.identifier.serialize(options): self.expression.serialize(options)} @@ -44,11 +47,13 @@ class BodyRule(LarkRule): ] ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "body" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] @@ -99,11 +104,13 @@ class StartRule(LarkRule): def body(self) -> BodyRule: return self._children[0] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "start" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.body.serialize(options) @@ -118,12 +125,12 @@ def __init__(self, children, meta: Optional[Meta] = None): child for child in children if not isinstance(child, LarkToken) ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "block" @property - def labels(self) -> List[IdentifierToken]: + def labels(self) -> List[NAME]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -131,10 +138,18 @@ def body(self) -> BodyRule: return self._body def serialize( - self, options: SerializationOptions = SerializationOptions() - ) -> BodyRule: + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels): + for label in reversed(labels[1:]): result = {label.serialize(options): result} + + result.update( + { + START_LINE: self._meta.line, + END_LINE: self._meta.end_line, + } + ) + return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index c39f3ba2..11ac0f5e 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,7 +1,8 @@ +import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, IntLitRule, @@ -9,36 +10,135 @@ ) from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import ( - COLON_TOKEN, - EQ_TOKEN, - LBRACE_TOKEN, - COMMA_TOKEN, - RBRACE_TOKEN, + COLON, + EQ, + LBRACE, + COMMA, + RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class TupleRule(InlineCommentMixIn): + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + Tuple[ + ExpressionRule, + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ... + ], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "tuple" + + @property + def elements(self) -> List[ExpressionRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ExpressionRule) + ] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_tuples: + return [element.serialize(options, context) for element in self.elements] + + with context.modify(inside_dollar_string=True): + result = f"[{", ".join( + str(element.serialize(options, context)) for element in self.elements + )}]" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result class ObjectElemKeyRule(LarkRule): - _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] + + _children: Tuple[key_T] @staticmethod def lark_name() -> str: return "object_elem_key" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.children[0].serialize(options) + @property + def value(self) -> key_T: + return self._children[0] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + return self.value.serialize(options, context) + + +class ObjectElemKeyExpressionRule(LarkRule): + + _children: Tuple[ + LPAR, + ExpressionRule, + RPAR, + ] + + + @staticmethod + def lark_name() -> str: + return "object_elem_key_expression" + + @property + def expression(self) -> ExpressionRule: + return self._children[1] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + with context.modify(inside_dollar_string=True): + result = f"({self.expression.serialize(options, context)})" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ObjectElemKeyDotAccessor(LarkRule): + + _children: Tuple[ + IdentifierRule, + Tuple[ + IdentifierRule, + DOT, + ... + ] + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_dot_accessor" + + @property + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) class ObjectElemRule(LarkRule): _children: Tuple[ ObjectElemKeyRule, - Union[EQ_TOKEN, COLON_TOKEN], - Expression, + Union[EQ, COLON], + ExpressionRule, ] @staticmethod @@ -47,25 +147,31 @@ def lark_name() -> str: @property def key(self) -> ObjectElemKeyRule: - return self.children[0] + return self._children[0] @property def expression(self): - return self.children[2] + return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: return { - self.children[0].serialize(options): self.children[2].serialize(options) + self.key.serialize(options, context): self.expression.serialize(options, context) } class ObjectRule(InlineCommentMixIn): _children: Tuple[ - LBRACE_TOKEN, + LBRACE, Optional[NewLineOrCommentRule], - Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], - RBRACE_TOKEN, + Tuple[ + ObjectElemRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + ... + ], + RBRACE, ] @staticmethod @@ -78,8 +184,21 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - result = {} - for element in self.elements: - result.update(element.serialize()) + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_objects: + result = {} + for element in self.elements: + result.update(element.serialize(options, context)) + + return result + + with context.modify(inside_dollar_string=True): + result = f"{{{", ".join( + f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + for element in self.elements + )}}}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expressions.py similarity index 53% rename from hcl2/rule_transformer/rules/expression.py rename to hcl2/rule_transformer/rules/expressions.py index 8a03f813..d89f3b3c 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -8,7 +8,7 @@ LarkToken, ) from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, @@ -18,46 +18,46 @@ to_dollar_string, unwrap_dollar_string, SerializationOptions, + SerializationContext, ) -class Expression(InlineCommentMixIn, ABC): - @property - def lark_name(self) -> str: +class ExpressionRule(InlineCommentMixIn, ABC): + @staticmethod + def lark_name() -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class ExprTermRule(Expression): +class ExprTermRule(ExpressionRule): type_ = Tuple[ - Optional[LPAR_TOKEN], + Optional[LPAR], Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - Optional[RPAR_TOKEN], + Optional[RPAR], ] _children: type_ - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].lark_name == "LPAR" + and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].lark_name == "RPAR" + and children[-1].lark_name() == "RPAR" ): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @@ -66,35 +66,37 @@ def parentheses(self) -> bool: return self._parentheses @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: - result = self.expression.serialize(options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self.expression.serialize(options, context) + if self.parentheses: result = wrap_into_parentheses(result) - result = to_dollar_string(result) - - if options.unwrap_dollar_string: - result = unwrap_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class ConditionalRule(Expression): +class ConditionalRule(ExpressionRule): _children: Tuple[ - Expression, - QMARK_TOKEN, + ExpressionRule, + QMARK, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - COLON_TOKEN, + COLON, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -102,25 +104,34 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @property - def condition(self) -> Expression: + def condition(self) -> ExpressionRule: return self._children[0] @property - def if_true(self) -> Expression: + def if_true(self) -> ExpressionRule: return self._children[3] @property - def if_false(self) -> Expression: + def if_false(self) -> ExpressionRule: return self._children[7] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - options = options.replace(unwrap_dollar_string=True) - print(self.condition) - result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" - return to_dollar_string(result) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=False): + result = ( + f"{self.condition.serialize(options, context)} " + f"? {self.if_true.serialize(options, context)} " + f": {self.if_false.serialize(options, context)}" + ) + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class BinaryTermRule(Expression): + +class BinaryTermRule(ExpressionRule): _children: Tuple[ BinaryOperatorRule, @@ -128,8 +139,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -144,19 +155,21 @@ def binary_operator(self) -> BinaryOperatorRule: def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" -class BinaryOpRule(Expression): +class BinaryOpRule(ExpressionRule): _children: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_op" @property @@ -167,24 +180,28 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - children_options = options.replace(unwrap_dollar_string=True) - lhs = self.expr_term.serialize(children_options) - operator = self.binary_term.binary_operator.serialize(children_options) - rhs = self.binary_term.expr_term.serialize(children_options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + lhs = self.expr_term.serialize(options, context) + operator = self.binary_term.binary_operator.serialize(options, context) + rhs = self.binary_term.expr_term.serialize(options, context) result = f"{lhs} {operator} {rhs}" - if options.unwrap_dollar_string: - return result - return to_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class UnaryOpRule(Expression): + +class UnaryOpRule(ExpressionRule): _children: Tuple[LarkToken, ExprTermRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "unary_op" @property @@ -195,5 +212,9 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string( + f"{self.operator}{self.expr_term.serialize(options, context)}" + ) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py new file mode 100644 index 00000000..412a1667 --- /dev/null +++ b/hcl2/rule_transformer/rules/functions.py @@ -0,0 +1,104 @@ +from functools import lru_cache +from typing import Any, Optional, Tuple, Union, List + +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class ArgumentsRule(InlineCommentMixIn): + + _children: Tuple[ + ExpressionRule, + Tuple[ + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ExpressionRule, + ... + ], + Optional[Union[COMMA, ELLIPSIS]], + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "arguments" + + @property + @lru_cache(maxsize=None) + def has_ellipsis(self) -> bool: + for child in self._children[-2:]: + if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": + return True + return False + + @property + def arguments(self) -> List[ExpressionRule]: + return [child for child in self._children if isinstance(child, ExpressionRule)] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + if self.has_ellipsis: + result += " ..." + return result + + +class FunctionCallRule(InlineCommentMixIn): + + _children: Tuple[ + IdentifierRule, + Optional[IdentifierRule], + Optional[IdentifierRule], + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "function_call" + + @property + @lru_cache(maxsize=None) + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + @property + @lru_cache(maxsize=None) + def arguments(self) -> Optional[ArgumentsRule]: + for child in self._children[2:6]: + if isinstance(child, ArgumentsRule): + return child + + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ( + f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"({self.arguments.serialize(options, context) if self.arguments else ""})" + ) + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +# class ProviderFunctionCallRule(FunctionCallRule): +# _children: Tuple[ +# IdentifierRule, +# IdentifierRule, +# IdentifierRule, +# LPAR, +# Optional[NewLineOrCommentRule], +# Optional[ArgumentsRule], +# Optional[NewLineOrCommentRule], +# RPAR, +# ] +# +# @staticmethod +# def lark_name() -> str: +# return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index ce23d040..7a9b53a5 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -1,59 +1,69 @@ -from typing import List, Optional, Tuple, Any +from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import ( - DOT_TOKEN, - IntToken, - LSQB_TOKEN, - RSQB_TOKEN, + DOT, + IntLiteral, + LSQB, + RSQB, + ATTR_SPLAT, ) from hcl2.rule_transformer.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + to_dollar_string, + SerializationContext, +) class ShortIndexRule(LarkRule): _children: Tuple[ - DOT_TOKEN, - IntToken, + DOT, + IntLiteral, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "short_index" @property def index(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f".{self.index.serialize(options)}" -class SqbIndex(InlineCommentMixIn): +class SqbIndexRule(InlineCommentMixIn): _children: Tuple[ - LSQB_TOKEN, + LSQB, Optional[NewLineOrCommentRule], ExprTermRule, Optional[NewLineOrCommentRule], - RSQB_TOKEN, + RSQB, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "braces_index" @property def index_expression(self): return self.children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -61,15 +71,170 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class IndexExprTermRule(Expression): +class IndexExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, SqbIndex] + _children: Tuple[ExprTermRule, SqbIndexRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "index_expr_term" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string( - f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class GetAttrRule(LarkRule): + + _children: Tuple[ + DOT, + IdentifierRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr" + + @property + def identifier(self) -> IdentifierRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.identifier.serialize(options, context)}" + + +class GetAttrExprTermRule(ExpressionRule): + + _children: Tuple[ + ExprTermRule, + GetAttrRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def get_attr(self) -> GetAttrRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class AttrSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "attr_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".*" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class AttrSplatExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, AttrSplatRule] + + @staticmethod + def lark_name() -> str: + return "attr_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> AttrSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class FullSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "full_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return "[*]" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs ) + + +class FullSplatExprTermRule(ExpressionRule): + _children: Tuple[ExprTermRule, FullSplatRule] + + @staticmethod + def lark_name() -> str: + return "full_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> FullSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index 06ca99ae..db7e8289 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): @@ -13,35 +13,43 @@ class TokenRule(LarkRule, ABC): def token(self) -> LarkToken: return self._children[0] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.token.serialize() +class KeywordRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "keyword" + + class IdentifierRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "identifier" class IntLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "int_lit" class FloatLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "float_lit" class StringPartRule(TokenRule): - @property - def lark_name(self) -> str: - return "string" + @staticmethod + def lark_name() -> str: + return "string_part" class BinaryOperatorRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 0f53c55a..dc3b85b0 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -3,48 +3,56 @@ from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import StringPartRule from hcl2.rule_transformer.rules.tokens import ( - INTERP_START_TOKEN, - RBRACE_TOKEN, - DBLQUOTE_TOKEN, - STRING_CHARS_TOKEN, + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, ) -from hcl2.rule_transformer.utils import SerializationOptions class StringRule(LarkRule): - _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "string" @property def string_parts(self): return self.children[1:-1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class InterpolationRule(LarkRule): _children: Tuple[ - INTERP_START_TOKEN, - Expression, - RBRACE_TOKEN, + INTERP_START, + ExpressionRule, + RBRACE, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "interpolation" @property def expression(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return "${" + self.expression.serialize(options) + "}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string(self.expression.serialize(options)) diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py deleted file mode 100644 index 66d780b3..00000000 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ /dev/null @@ -1,56 +0,0 @@ -# from abc import ABC -# from typing import Tuple, Any, List, Optional, Type -# -# from lark.tree import Meta -# -# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -# from hcl2.rule_transformer.utils import SerializationOptions -# -# -# class TokenSequenceRule(LarkRule, ABC): -# -# _children: Tuple[TokenSequence] -# -# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): -# children = [TokenSequence(children)] -# super().__init__(children, meta) -# -# def serialized_type(self) -> Type: -# return str -# -# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: -# return self.serialized_type()(self._children[0].serialize(options)) -# -# -# class IdentifierRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "identifier" -# -# -# class IntLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "int_lit" -# -# def serialized_type(self) -> Type: -# return int -# -# -# class FloatLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "float_lit" -# -# def serialized_type(self) -> Type: -# return float -# -# -# class StringLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; -# # nevertheless, try to change it to a rule in the grammar -# return "STRING_LIT" -# -# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 18e1ac07..7dd79f63 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,54 +1,67 @@ -from typing import Callable, Any +from functools import lru_cache +from typing import Callable, Any, Type from hcl2.rule_transformer.rules.abstract import LarkToken class StringToken(LarkToken): - def __init__(self, name: str, value: Any): + """ + Single run-time base class; every `StringToken["..."]` call returns a + cached subclass whose static `lark_name()` yields the given string. + """ + + @staticmethod + @lru_cache(maxsize=None) + def __build_subclass(name: str) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + return type( # type: ignore + f"{name}_TOKEN", + (StringToken,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + }, + ) + + def __class_getitem__(cls, name: str) -> Type["StringToken"]: + if not isinstance(name, str): + raise TypeError("StringToken[...] expects a single str argument") + return cls.__build_subclass(name) + + def __init__(self, value: Any) -> None: super().__init__(value) - self._name = name @property - def lark_name(self) -> str: - return self._name - - @property - def serialize_conversion(self) -> Callable: - return str - - -# explicitly define various kinds of string-based tokens -STRING_CHARS_TOKEN = StringToken -ESCAPED_INTERPOLATION_TOKEN = StringToken -BINARY_OP_TOKEN = StringToken -EQ_TOKEN = StringToken -COLON_TOKEN = StringToken -LPAR_TOKEN = StringToken # ( -RPAR_TOKEN = StringToken # ) -LBRACE_TOKEN = StringToken # { -RBRACE_TOKEN = StringToken # } -DOT_TOKEN = StringToken -COMMA_TOKEN = StringToken -QMARK_TOKEN = StringToken -LSQB_TOKEN = StringToken # [ -RSQB_TOKEN = StringToken # ] -INTERP_START_TOKEN = StringToken # ${ -DBLQUOTE_TOKEN = StringToken # " - - -class IdentifierToken(LarkToken): - @property - def lark_name(self) -> str: - return "IDENTIFIER" - - @property - def serialize_conversion(self) -> Callable: + def serialize_conversion(self) -> Callable[[Any], str]: return str -class IntToken(LarkToken): - @property - def lark_name(self) -> str: +# explicitly define various kinds of string-based tokens for type hinting +NAME = StringToken["NAME"] +STRING_CHARS = StringToken["STRING_CHARS"] +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] +BINARY_OP = StringToken["BINARY_OP"] +EQ = StringToken["EQ"] +COLON = StringToken["COLON"] +LPAR = StringToken["LPAR"] +RPAR = StringToken["RPAR"] +LBRACE = StringToken["LBRACE"] +RBRACE = StringToken["RBRACE"] +DOT = StringToken["DOT"] +COMMA = StringToken["COMMA"] +ELLIPSIS = StringToken["ELLIPSIS"] +QMARK = StringToken["QMARK"] +LSQB = StringToken["LSQB"] +RSQB = StringToken["RSQB"] +INTERP_START = StringToken["INTERP_START"] +DBLQUOTE = StringToken["DBLQUOTE"] +ATTR_SPLAT = StringToken["ATTR_SPLAT"] +FULL_SPLAT = StringToken["FULL_SPLAT"] + + +class IntLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "INT_LITERAL" @property @@ -56,9 +69,9 @@ def serialize_conversion(self) -> Callable: return int -class FloatToken(LarkToken): - @property - def lark_name(self) -> str: +class FloatLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "FLOAT_LITERAL" @property diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 65d5dd9c..fa24355c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -7,8 +7,8 @@ class NewLineOrCommentRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "new_line_or_comment" @classmethod diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 31e88d61..41e970d6 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -14,18 +14,28 @@ ObjectRule, ObjectElemRule, ObjectElemKeyRule, + TupleRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expression import ( +from hcl2.rule_transformer.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, - SqbIndex, + SqbIndexRule, ShortIndexRule, + GetAttrRule, + GetAttrExprTermRule, + AttrSplatExprTermRule, + AttrSplatRule, + FullSplatRule, + FullSplatExprTermRule, ) from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, @@ -36,10 +46,10 @@ ) from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( - IdentifierToken, + NAME, + IntLiteral, + FloatLiteral, StringToken, - IntToken, - FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -59,16 +69,16 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: - return StringToken(token.type, token.value) + return StringToken[token.type](token.value) - def IDENTIFIER(self, token: Token) -> IdentifierToken: - return IdentifierToken(token.value) + def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: + return FloatLiteral(token.value) - def INT_LITERAL(self, token: Token) -> IntToken: - return IntToken(token.value) + def NAME(self, token: Token) -> NAME: + return NAME(token.value) - def FLOAT_LITERAL(self, token: Token) -> FloatToken: - return FloatToken(token.value) + def INT_LITERAL(self, token: Token) -> IntLiteral: + return IntLiteral(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -140,6 +150,10 @@ def unary_op(self, meta: Meta, args) -> UnaryOpRule: def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) + @v_args(meta=True) + def tuple(self, meta: Meta, args) -> TupleRule: + return TupleRule(args, meta) + @v_args(meta=True) def object(self, meta: Meta, args) -> ObjectRule: return ObjectRule(args, meta) @@ -152,14 +166,62 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: return ObjectElemKeyRule(args, meta) + @v_args(meta=True) + def object_elem_key_expression( + self, meta: Meta, args + ) -> ObjectElemKeyExpressionRule: + return ObjectElemKeyExpressionRule(args, meta) + + @v_args(meta=True) + def object_elem_key_dot_accessor( + self, meta: Meta, args + ) -> ObjectElemKeyDotAccessor: + return ObjectElemKeyDotAccessor(args, meta) + + @v_args(meta=True) + def arguments(self, meta: Meta, args) -> ArgumentsRule: + return ArgumentsRule(args, meta) + + @v_args(meta=True) + def function_call(self, meta: Meta, args) -> FunctionCallRule: + return FunctionCallRule(args, meta) + + # @v_args(meta=True) + # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: + # return ProviderFunctionCallRule(args, meta) + @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) @v_args(meta=True) - def braces_index(self, meta: Meta, args) -> SqbIndex: - return SqbIndex(args, meta) + def braces_index(self, meta: Meta, args) -> SqbIndexRule: + return SqbIndexRule(args, meta) @v_args(meta=True) def short_index(self, meta: Meta, args) -> ShortIndexRule: return ShortIndexRule(args, meta) + + @v_args(meta=True) + def get_attr(self, meta: Meta, args) -> GetAttrRule: + return GetAttrRule(args, meta) + + @v_args(meta=True) + def get_attr_expr_term(self, meta: Meta, args) -> GetAttrExprTermRule: + return GetAttrExprTermRule(args, meta) + + @v_args(meta=True) + def attr_splat(self, meta: Meta, args) -> AttrSplatRule: + return AttrSplatRule(args, meta) + + @v_args(meta=True) + def attr_splat_expr_term(self, meta: Meta, args) -> AttrSplatExprTermRule: + return AttrSplatExprTermRule(args, meta) + + @v_args(meta=True) + def full_splat(self, meta: Meta, args) -> FullSplatRule: + return FullSplatRule(args, meta) + + @v_args(meta=True) + def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: + return FullSplatExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 6a6ed661..8ffeab8b 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,15 +1,48 @@ +from contextlib import contextmanager from dataclasses import dataclass, replace +from typing import Generator @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - unwrap_dollar_string: bool = False - - def replace(self, **kwargs) -> "SerializationOptions": + wrap_objects: bool = False + wrap_tuples: bool = False + + +@dataclass +class DeserializationOptions: + pass + + +@dataclass +class SerializationContext: + inside_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) - + + @contextmanager + def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: + """Context manager that yields a modified copy of the context""" + modified_context = self.replace(**kwargs) + yield modified_context + + @contextmanager + def modify(self, **kwargs): + original_values = {key: getattr(self, key) for key in kwargs} + + for key, value in kwargs.items(): + setattr(self, key, value) + + try: + yield + finally: + # Restore original values + for key, value in original_values.items(): + setattr(self, key, value) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From f0f6fc995624fc19878cfa86743aa899c7344b6c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Tue, 12 Aug 2025 14:48:52 +0200 Subject: [PATCH 07/42] add JSON -> LarkElement deserializer; batch of other changes --- hcl2/const.py | 1 + hcl2/rule_transformer/deserializer.py | 264 ++++++++++++++++++- hcl2/rule_transformer/rules/abstract.py | 3 +- hcl2/rule_transformer/rules/base.py | 23 +- hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/literal_rules.py | 6 - hcl2/rule_transformer/rules/strings.py | 57 ++-- hcl2/rule_transformer/rules/tokens.py | 72 +++-- hcl2/rule_transformer/transformer.py | 7 +- hcl2/rule_transformer/utils.py | 1 + 10 files changed, 365 insertions(+), 71 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1d46f35a..1bd4a4ce 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ START_LINE_KEY = "__start_line__" END_LINE_KEY = "__end_line__" +IS_BLOCK = "__is_block__" diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 5bdcf775..7b834968 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,7 +1,54 @@ import json +from functools import lru_cache from typing import Any, TextIO, List +from regex import regex + +from hcl2 import parses +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.rules.base import ( + BlockRule, + AttributeRule, + BodyRule, + StartRule, +) +from hcl2.rule_transformer.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyRule, +) +from hcl2.rule_transformer.rules.expressions import ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import ( + IdentifierRule, + IntLitRule, + FloatLitRule, +) +from hcl2.rule_transformer.rules.strings import ( + StringRule, + InterpolationRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.tokens import ( + NAME, + EQ, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + INTERP_START, + RBRACE, + IntLiteral, + FloatLiteral, + RSQB, + LSQB, + COMMA, + DOT, + LBRACE, +) +from hcl2.rule_transformer.transformer import RuleTransformer from hcl2.rule_transformer.utils import DeserializationOptions @@ -9,8 +56,13 @@ class Deserializer: def __init__(self, options=DeserializationOptions()): self.options = options + @property + @lru_cache + def _transformer(self) -> RuleTransformer: + return RuleTransformer() + def load_python(self, value: Any) -> LarkElement: - pass + return StartRule([self._deserialize(value)]) def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) @@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement: return self.loads(file.read()) def _deserialize(self, value: Any) -> LarkElement: - pass + if isinstance(value, dict): + if self._contains_block_marker(value): + elements = self._deserialize_block_elements(value) + return BodyRule(elements) + + return self._deserialize_object(value) + + if isinstance(value, list): + return self._deserialize_list(value) + + return self._deserialize_text(value) + + def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: + children = [] + + for key, value in value.items(): + if self._is_block(value): + # this value is a list of blocks, iterate over each block and deserialize them + for block in value: + children.append(self._deserialize_block(key, block)) + else: + + # otherwise it's just an attribute + if key != IS_BLOCK: + children.append(self._deserialize_attribute(key, value)) + + return children + + def _deserialize_text(self, value) -> LarkRule: + try: + int_val = int(value) + return IntLitRule([IntLiteral(int_val)]) + except ValueError: + pass + + try: + float_val = float(value) + return FloatLitRule([FloatLiteral(float_val)]) + except ValueError: + pass + + if isinstance(value, str): + if value.startswith('"') and value.endswith('"'): + return self._deserialize_string(value) + + if self._is_expression(value): + return self._deserialize_expression(value) + + return self._deserialize_identifier(value) + + elif isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + return self._deserialize_identifier(str(value)) + + def _deserialize_identifier(self, value: str) -> IdentifierRule: + return IdentifierRule([NAME(value)]) + + def _deserialize_string(self, value: str) -> StringRule: + result = [] + + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + + for part in parts: + if part == '"': + continue + + if part.startswith('"'): + part = part[1:] + if part.endswith('"'): + part = part[:-1] + + e = self._deserialize_string_part(part) + result.append(e) + + return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + + def _deserialize_string_part(self, value: str) -> StringPartRule: + if value.startswith("$${") and value.endswith("}"): + return StringPartRule([ESCAPED_INTERPOLATION(value)]) + + if value.startswith("${") and value.endswith("}"): + return StringPartRule( + [ + InterpolationRule( + [INTERP_START(), self._deserialize_expression(value), RBRACE()] + ) + ] + ) + + return StringPartRule([STRING_CHARS(value)]) + + def _deserialize_expression(self, value: str) -> ExprTermRule: + """Deserialize an expression string into an ExprTermRule.""" + # instead of processing expression manually and trying to recognize what kind of expression it is, + # turn it into HCL2 code and parse it with lark: + + # unwrap from ${ and } + value = value[2:-1] + # create HCL2 snippet + value = f"temp = {value}" + # parse the above + parsed_tree = parses(value) + # transform parsed tree into LarkElement tree + rules_tree = self._transformer.transform(parsed_tree) + # extract expression from the tree + return rules_tree.body.children[0].expression + + def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: + """Deserialize a block by extracting labels and body""" + labels = [first_label] + body = value + + # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) + while isinstance(body, dict) and not body.get(IS_BLOCK): + non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + if len(non_block_keys) == 1: + # This is another label level + label = non_block_keys[0] + labels.append(label) + body = body[label] + else: + # Multiple keys = this is the body + break + + return BlockRule( + [*[self._deserialize(label) for label in labels], self._deserialize(body)] + ) + + def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + children = [ + self._deserialize_identifier(name), + EQ(), + ExprTermRule([self._deserialize(value)]), + ] + return AttributeRule(children) + + def _deserialize_list(self, value: List) -> TupleRule: + children = [] + for element in value: + deserialized = self._deserialize(element) + if not isinstance(deserialized, ExprTermRule): + # whatever an element of the list is, it has to be nested inside ExprTermRule + deserialized = ExprTermRule([deserialized]) + children.append(deserialized) + children.append(COMMA()) + + return TupleRule([LSQB(), *children, RSQB()]) + + def _deserialize_object(self, value: dict) -> ObjectRule: + children = [] + for key, value in value.items(): + children.append(self._deserialize_object_elem(key, value)) + return ObjectRule([LBRACE(), *children, RBRACE()]) + + def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + if self._is_expression(key): + key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + elif "." in key: + parts = key.split(".") + children = [] + for part in parts: + children.append(self._deserialize_identifier(part)) + children.append(DOT()) + key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + else: + key = self._deserialize_text(key) + + return ObjectElemRule( + [ + ObjectElemKeyRule([key]), + EQ(), + ExprTermRule([self._deserialize_text(value)]), + ] + ) + + def _is_expression(self, value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + def _is_block(self, value: Any) -> bool: + """Simple check: if it's a list containing dicts with IS_BLOCK markers""" + if not isinstance(value, list) or len(value) == 0: + return False - def _deserialize_dict(self, value: dict) -> LarkRule: - pass + # Check if any item in the list has IS_BLOCK marker (directly or nested) + for item in value: + if isinstance(item, dict) and self._contains_block_marker(item): + return True - def _deserialize_list(self, value: List) -> LarkRule: - pass + return False - def _deserialize_expression(self, value: str) -> LarkRule: - pass + def _contains_block_marker(self, obj: dict) -> bool: + """Recursively check if a dict contains IS_BLOCK marker anywhere""" + if obj.get(IS_BLOCK): + return True + for value in obj.values(): + if isinstance(value, dict) and self._contains_block_marker(value): + return True + if isinstance(value, list): + for element in value: + if self._contains_block_marker(element): + return True + return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index e32d9ddb..33dcc9ca 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -36,7 +36,7 @@ def serialize( class LarkToken(LarkElement, ABC): - def __init__(self, value: Union[str, int]): + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -100,7 +100,6 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: - print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index da74954b..5c8468d4 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,11 @@ from lark.tree import Meta -from hcl2.dict_transformer import START_LINE, END_LINE +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -42,7 +44,7 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - # AttributeRule, + AttributeRule, "BlockRule", ] ] @@ -58,6 +60,7 @@ def serialize( attributes: List[AttributeRule] = [] comments = [] inline_comments = [] + for child in self._children: if isinstance(child, BlockRule): @@ -116,7 +119,11 @@ def serialize( class BlockRule(LarkRule): - _children: Tuple[BodyRule] + _children: Tuple[ + IdentifierRule, + Optional[Union[IdentifierRule, StringRule]], + BodyRule, + ] def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @@ -141,15 +148,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: result = self._body.serialize(options) + if options.explicit_blocks: + result.update({IS_BLOCK: True}) + labels = self._labels for label in reversed(labels[1:]): result = {label.serialize(options): result} - result.update( - { - START_LINE: self._meta.line, - END_LINE: self._meta.end_line, - } - ) - return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 412a1667..54958514 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -78,7 +78,7 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: result = ( - f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" f"({self.arguments.serialize(options, context) if self.arguments else ""})" ) if not context.inside_dollar_string: diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index db7e8289..baf8546f 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -43,12 +43,6 @@ def lark_name() -> str: return "float_lit" -class StringPartRule(TokenRule): - @staticmethod - def lark_name() -> str: - return "string_part" - - class BinaryOperatorRule(TokenRule): @staticmethod def lark_name() -> str: diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index dc3b85b0..769ad5b9 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,15 +1,13 @@ -from typing import Tuple, Optional, List, Any, Union - -from lark.tree import Meta +from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, STRING_CHARS, + ESCAPED_INTERPOLATION, ) from hcl2.rule_transformer.utils import ( SerializationOptions, @@ -18,41 +16,58 @@ ) -class StringRule(LarkRule): +class InterpolationRule(LarkRule): - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children: Tuple[ + INTERP_START, + ExpressionRule, + RBRACE, + ] @staticmethod def lark_name() -> str: - return "string" + return "interpolation" @property - def string_parts(self): - return self.children[1:-1] + def expression(self): + return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return to_dollar_string(self.expression.serialize(options)) -class InterpolationRule(LarkRule): +class StringPartRule(LarkRule): + _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] - _children: Tuple[ - INTERP_START, - ExpressionRule, - RBRACE, - ] + @staticmethod + def lark_name() -> str: + return "string_part" + + @property + def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.content.serialize(options, context) + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: - return "interpolation" + return "string" @property - def expression(self): - return self.children[1] + def string_parts(self): + return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string(self.expression.serialize(options)) + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 7dd79f63..59e524f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import Callable, Any, Type +from typing import Callable, Any, Type, Optional, Tuple from hcl2.rule_transformer.rules.abstract import LarkToken @@ -10,9 +10,9 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ - @staticmethod + @classmethod @lru_cache(maxsize=None) - def __build_subclass(name: str) -> Type["StringToken"]: + def __build_subclass(cls, name: str) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" return type( # type: ignore f"{name}_TOKEN", @@ -28,7 +28,7 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Any) -> None: + def __init__(self, value: Optional[Any] = None): super().__init__(value) @property @@ -36,27 +36,59 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str +class StaticStringToken(LarkToken): + @classmethod + @lru_cache(maxsize=None) + def __build_subclass( + cls, name: str, default_value: str = None + ) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + + return type( # type: ignore + f"{name}_TOKEN", + (cls,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + "_default_value": default_value, + }, + ) + + def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: + name, default_value = value + return cls.__build_subclass(name, default_value) + + def __init__(self): + super().__init__(getattr(self, "_default_value")) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + # explicitly define various kinds of string-based tokens for type hinting +# variable value NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -EQ = StringToken["EQ"] -COLON = StringToken["COLON"] -LPAR = StringToken["LPAR"] -RPAR = StringToken["RPAR"] -LBRACE = StringToken["LBRACE"] -RBRACE = StringToken["RBRACE"] -DOT = StringToken["DOT"] -COMMA = StringToken["COMMA"] -ELLIPSIS = StringToken["ELLIPSIS"] -QMARK = StringToken["QMARK"] -LSQB = StringToken["LSQB"] -RSQB = StringToken["RSQB"] -INTERP_START = StringToken["INTERP_START"] -DBLQUOTE = StringToken["DBLQUOTE"] -ATTR_SPLAT = StringToken["ATTR_SPLAT"] -FULL_SPLAT = StringToken["FULL_SPLAT"] +# static value +EQ = StaticStringToken[("EQ", "=")] +COLON = StaticStringToken[("COLON", ":")] +LPAR = StaticStringToken[("LPAR", "(")] +RPAR = StaticStringToken[("RPAR", ")")] +LBRACE = StaticStringToken[("LBRACE", "{")] +RBRACE = StaticStringToken[("RBRACE", "}")] +DOT = StaticStringToken[("DOT", ".")] +COMMA = StaticStringToken[("COMMA", ",")] +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] +QMARK = StaticStringToken[("QMARK", "?")] +LSQB = StaticStringToken[("LSQB", "[")] +RSQB = StaticStringToken[("RSQB", "]")] +INTERP_START = StaticStringToken[("INTERP_START", "${")] +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 41e970d6..a7d91605 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,6 +1,4 @@ # pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -42,9 +40,12 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.strings import ( + InterpolationRule, + StringRule, StringPartRule, ) -from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( NAME, IntLiteral, diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8ffeab8b..404bdcdd 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -9,6 +9,7 @@ class SerializationOptions: with_meta: bool = False wrap_objects: bool = False wrap_tuples: bool = False + explicit_blocks: bool = True @dataclass From d8ac92d8f41de654218280aeb26f2cf4a45879f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 27 Aug 2025 11:35:56 +0200 Subject: [PATCH 08/42] add heredoc rules and deserialization; require heredoc openers to be on their on separate line in lark grammar; whitespace trimming based on current implementation in dict_transformer.py; --- hcl2/rule_transformer/deserializer.py | 32 ++++++++--- hcl2/rule_transformer/hcl2.lark | 4 +- hcl2/rule_transformer/rules/strings.py | 73 ++++++++++++++++++++++++++ hcl2/rule_transformer/rules/tokens.py | 6 ++- hcl2/rule_transformer/transformer.py | 12 ++++- hcl2/rule_transformer/utils.py | 8 ++- 6 files changed, 123 insertions(+), 12 deletions(-) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 7b834968..a17a9510 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,6 @@ import json from functools import lru_cache -from typing import Any, TextIO, List +from typing import Any, TextIO, List, Union from regex import regex @@ -31,6 +31,8 @@ StringRule, InterpolationRule, StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -47,9 +49,11 @@ COMMA, DOT, LBRACE, + HEREDOC_TRIM_TEMPLATE, + HEREDOC_TEMPLATE, ) from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions +from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN class Deserializer: @@ -99,7 +103,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children - def _deserialize_text(self, value) -> LarkRule: + def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) return IntLitRule([IntLiteral(int_val)]) @@ -114,6 +118,16 @@ def _deserialize_text(self, value) -> LarkRule: if isinstance(value, str): if value.startswith('"') and value.endswith('"'): + if not self.options.heredocs_to_strings and value.startswith('"<<-'): + match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], True) + + if not self.options.heredocs_to_strings and value.startswith('"<<'): + match = HEREDOC_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], False) + return self._deserialize_string(value) if self._is_expression(value): @@ -131,11 +145,12 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule: def _deserialize_string(self, value: str) -> StringRule: result = [] - - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(value) if part != ""] + # split string into individual parts based on lark grammar # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + for part in parts: if part == '"': @@ -166,6 +181,11 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) + def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + if trim: + return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 3f8d913e..24140ada 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -67,8 +67,8 @@ ELLIPSIS : "..." COLONS: "::" // Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ // Ignore whitespace (but not newlines, as they're significant in HCL) %ignore /[ \t]+/ diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 769ad5b9..4e28e976 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,3 +1,4 @@ +import sys from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule @@ -8,11 +9,15 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, ) from hcl2.rule_transformer.utils import ( SerializationOptions, SerializationContext, to_dollar_string, + HEREDOC_TRIM_PATTERN, + HEREDOC_PATTERN, ) @@ -71,3 +76,71 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class HeredocTemplateRule(LarkRule): + + _children: Tuple[HEREDOC_TEMPLATE] + _trim_chars = "\n\t " + + + @staticmethod + def lark_name() -> str: + return "heredoc_template" + + @property + def heredoc(self): + return self.children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + result = heredoc.rstrip(self._trim_chars) + return f'"{result}"' + + +class HeredocTrimTemplateRule(HeredocTemplateRule): + + _children: Tuple[HEREDOC_TRIM_TEMPLATE] + + @staticmethod + def lark_name() -> str: + return "heredoc_trim_template" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions + # This is a special version of heredocs that are declared with "<<-" + # This will calculate the minimum number of leading spaces in each line of a heredoc + # and then remove that number of spaces from each line + + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_TRIM_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + heredoc = heredoc.rstrip(self._trim_chars) + lines = heredoc.split("\n") + + # calculate the min number of leading spaces in each line + min_spaces = sys.maxsize + for line in lines: + leading_spaces = len(line) - len(line.lstrip(" ")) + min_spaces = min(min_spaces, leading_spaces) + + # trim off that number of leading spaces from each line + lines = [line[min_spaces:] for line in lines] + return '"' + "\n".join(lines) + '"' + \ No newline at end of file diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 59e524f3..5b1959f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -67,12 +67,14 @@ def serialize_conversion(self) -> Callable[[Any], str]: # explicitly define various kinds of string-based tokens for type hinting -# variable value +# variable values NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -# static value +HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +# static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] LPAR = StaticStringToken[("LPAR", "(")] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index a7d91605..37ae445c 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -44,7 +44,9 @@ from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -127,6 +129,14 @@ def string_part(self, meta: Meta, args) -> StringPartRule: def interpolation(self, meta: Meta, args) -> InterpolationRule: return InterpolationRule(args, meta) + @v_args(meta=True) + def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: + return HeredocTemplateRule(args, meta) + + @v_args(meta=True) + def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: + return HeredocTrimTemplateRule(args, meta) + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 404bdcdd..98370ca3 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,7 +1,12 @@ +import re from contextlib import contextmanager from dataclasses import dataclass, replace from typing import Generator +HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) +HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + + @dataclass class SerializationOptions: @@ -10,11 +15,12 @@ class SerializationOptions: wrap_objects: bool = False wrap_tuples: bool = False explicit_blocks: bool = True + preserve_heredocs: bool = True @dataclass class DeserializationOptions: - pass + heredocs_to_strings: bool = False @dataclass From 5932662bfe5045c2e944f7c9e3fc55c94077c4c9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 15 Sep 2025 12:26:59 +0200 Subject: [PATCH 09/42] add `for` expressions rules --- .../rule_transformer/rules/for_expressions.py | 283 ++++++++++++++++++ hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 4 + hcl2/rule_transformer/transformer.py | 36 ++- 4 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 hcl2/rule_transformer/rules/for_expressions.py diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py new file mode 100644 index 00000000..18abe6c8 --- /dev/null +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -0,0 +1,283 @@ +from typing import Any, Tuple, Optional, List + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ForIntroRule(InlineCommentMixIn): + """Rule for the intro part of for expressions: 'for key, value in collection :'""" + + _children: Tuple[ + FOR, + Optional[NewLineOrCommentRule], + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + Optional[NewLineOrCommentRule], + IN, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "for_intro" + + def __init__(self, children, meta: Optional[Meta] = None): + # Insert null comments at positions where they might be missing + self._possibly_insert_null_second_identifier(children) + self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + super().__init__(children, meta) + + def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): + second_identifier_present = ( + len([child for child in children if isinstance(child, IdentifierRule)]) == 2 + ) + if not second_identifier_present: + children.insert(3, None) + children.insert(4, None) + + @property + def first_iterator(self) -> IdentifierRule: + """Returns the first iterator""" + return self._children[2] + + @property + def second_iterator(self) -> Optional[IdentifierRule]: + """Returns the second iterator or None if not present""" + return self._children[4] + + @property + def iterable(self) -> ExpressionRule: + """Returns the collection expression being iterated over""" + return self._children[8] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + result = "for " + + result += f"{self.first_iterator.serialize(options, context)}" + if self.second_iterator: + result += f", {self.second_iterator.serialize(options, context)}" + + result += f" in {self.iterable.serialize(options, context)} : " + + return result + + +class ForCondRule(InlineCommentMixIn): + """Rule for the optional condition in for expressions: 'if condition'""" + + _children: Tuple[ + IF, + Optional[NewLineOrCommentRule], + ExpressionRule, # condition expression + ] + + @staticmethod + def lark_name() -> str: + return "for_cond" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1]) + super().__init__(children, meta) + + @property + def condition_expr(self) -> ExpressionRule: + """Returns the condition expression""" + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + return f"if {self.condition_expr.serialize(options, context)}" + + +class ForTupleExprRule(ExpressionRule): + """Rule for tuple/array for expressions: [for item in items : expression]""" + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "for_tuple_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 5, 7]) + self._possibly_insert_null_condition(children) + super().__init__(children, meta) + + def _possibly_insert_null_condition(self, children: List[LarkElement]): + if not len([child for child in children if isinstance(child, ForCondRule)]): + children.insert(6, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[4] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[6] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "[" + + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += self.value_expr.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "]" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ForObjectExprRule(ExpressionRule): + """Rule for object for expressions: {for key, value in items : key => value}""" + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + FOR_OBJECT_ARROW, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ELLIPSIS], + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "for_object_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) + self._possibly_insert_null_optionals(children) + super().__init__(children, meta) + + def _possibly_insert_null_optionals(self, children: List[LarkElement]): + has_ellipsis = False + has_condition = False + + for child in children: + # if not has_ellipsis and isinstance(child, ELLIPSIS): + if ( + has_ellipsis is False + and child is not None + and child.lark_name() == ELLIPSIS.lark_name() + ): + has_ellipsis = True + if not has_condition and isinstance(child, ForCondRule): + has_condition = True + + if not has_ellipsis: + children.insert(9, None) + + if not has_condition: + children.insert(11, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def key_expr(self) -> ExpressionRule: + """Returns the key expression""" + return self._children[4] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[7] + + @property + def ellipsis(self) -> Optional[ELLIPSIS]: + """Returns the optional ellipsis token""" + return self._children[9] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[11] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = "{" + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += f"{self.key_expr.serialize(options, context)} => " + + result += self.value_expr.serialize( + SerializationOptions(wrap_objects=True), context + ) + + if self.ellipsis is not None: + result += self.ellipsis.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 54958514..b25fed62 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -40,7 +40,7 @@ def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) if self.has_ellipsis: result += " ..." return result diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 5b1959f3..67d53fcf 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -91,6 +91,10 @@ def serialize_conversion(self) -> Callable[[Any], str]: DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] +FOR = StaticStringToken[("FOR", "for")] +IN = StaticStringToken[("IN", "in")] +IF = StaticStringToken[("IF", "if")] +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 37ae445c..1ab1dfda 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -23,6 +23,12 @@ ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, + ForIntroRule, + ForCondRule, +) from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, @@ -40,12 +46,13 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, + KeywordRule, ) from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, - HeredocTemplateRule, + StringPartRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -72,6 +79,7 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: + # TODO make this return StaticStringToken where applicable return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: @@ -109,6 +117,10 @@ def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) + @v_args(meta=True) + def keyword(self, meta: Meta, args) -> KeywordRule: + return KeywordRule(args, meta) + @v_args(meta=True) def int_lit(self, meta: Meta, args) -> IntLitRule: return IntLitRule(args, meta) @@ -132,11 +144,11 @@ def interpolation(self, meta: Meta, args) -> InterpolationRule: @v_args(meta=True) def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: return HeredocTemplateRule(args, meta) - + @v_args(meta=True) def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: return HeredocTrimTemplateRule(args, meta) - + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) @@ -236,3 +248,19 @@ def full_splat(self, meta: Meta, args) -> FullSplatRule: @v_args(meta=True) def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: return FullSplatExprTermRule(args, meta) + + @v_args(meta=True) + def for_tuple_expr(self, meta: Meta, args) -> ForTupleExprRule: + return ForTupleExprRule(args, meta) + + @v_args(meta=True) + def for_object_expr(self, meta: Meta, args) -> ForObjectExprRule: + return ForObjectExprRule(args, meta) + + @v_args(meta=True) + def for_intro(self, meta: Meta, args) -> ForIntroRule: + return ForIntroRule(args, meta) + + @v_args(meta=True) + def for_cond(self, meta: Meta, args) -> ForCondRule: + return ForCondRule(args, meta) From 107fcb223f176793e04aa750f2c120cb38d00afa Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 29 Sep 2025 13:10:35 +0200 Subject: [PATCH 10/42] add Lark AST -> HCL2 reconstructor and LarkTree formatter; various other fixes and changes: * preserve order of serialized attributes and blocks * make RuleTransformer.__default_token__ differentiate between StringToken and StaticStringToken * add separate ProviderFunctionCallRule class for more accurate reconstruction --- hcl2/rule_transformer/deserializer.py | 120 +++++--- hcl2/rule_transformer/formatter.py | 262 ++++++++++++++++++ hcl2/rule_transformer/reconstructor.py | 204 ++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 5 +- hcl2/rule_transformer/rules/base.py | 48 ++-- hcl2/rule_transformer/rules/containers.py | 69 +++-- hcl2/rule_transformer/rules/expressions.py | 9 +- .../rule_transformer/rules/for_expressions.py | 92 +++--- hcl2/rule_transformer/rules/functions.py | 63 +++-- hcl2/rule_transformer/rules/indexing.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 12 +- hcl2/rule_transformer/rules/whitespace.py | 9 +- hcl2/rule_transformer/transformer.py | 3 + hcl2/rule_transformer/utils.py | 6 - 14 files changed, 738 insertions(+), 166 deletions(-) create mode 100644 hcl2/rule_transformer/formatter.py create mode 100644 hcl2/rule_transformer/reconstructor.py diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index a17a9510..56e1ad44 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,8 @@ import json +from abc import ABC, abstractmethod +from dataclasses import dataclass from functools import lru_cache -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Union, Optional from regex import regex @@ -31,7 +33,7 @@ StringRule, InterpolationRule, StringPartRule, - HeredocTemplateRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -51,14 +53,38 @@ LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, + COLON, ) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN -class Deserializer: - def __init__(self, options=DeserializationOptions()): - self.options = options +@dataclass +class DeserializerOptions: + heredocs_to_strings: bool = False + indent_length: int = 2 + object_elements_colon: bool = False + object_elements_trailing_comma: bool = True + + +class LarkElementTreeDeserializer(ABC): + def __init__(self, options: DeserializerOptions = None): + self.options = options or DeserializerOptions() + + @abstractmethod + def loads(self, value: str) -> LarkElement: + raise NotImplementedError() + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + +class BaseDeserializer(LarkElementTreeDeserializer): + def __init__(self, options=None): + super().__init__(options) + self._current_line = 1 + self._last_new_line: Optional[NewLineOrCommentRule] = None @property @lru_cache @@ -66,19 +92,23 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - return StartRule([self._deserialize(value)]) + result = StartRule([self._deserialize(value)]) + return result def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) - def load(self, file: TextIO) -> LarkElement: - return self.loads(file.read()) - def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - elements = self._deserialize_block_elements(value) - return BodyRule(elements) + + children = [] + + block_elements = self._deserialize_block_elements(value) + for element in block_elements: + children.append(element) + + return BodyRule(children) return self._deserialize_object(value) @@ -89,14 +119,13 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): if self._is_block(value): # this value is a list of blocks, iterate over each block and deserialize them for block in value: children.append(self._deserialize_block(key, block)) - else: + else: # otherwise it's just an attribute if key != IS_BLOCK: children.append(self._deserialize_attribute(key, value)) @@ -106,28 +135,24 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) + if "." in str(value): + return FloatLitRule([FloatLiteral(float(value))]) return IntLitRule([IntLiteral(int_val)]) except ValueError: pass - try: - float_val = float(value) - return FloatLitRule([FloatLiteral(float_val)]) - except ValueError: - pass - if isinstance(value, str): if value.startswith('"') and value.endswith('"'): if not self.options.heredocs_to_strings and value.startswith('"<<-'): match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], True) - + if not self.options.heredocs_to_strings and value.startswith('"<<'): match = HEREDOC_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], False) - + return self._deserialize_string(value) if self._is_expression(value): @@ -151,7 +176,6 @@ def _deserialize_string(self, value: str) -> StringRule: pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") parts = [part for part in pattern.split(value) if part != ""] - for part in parts: if part == '"': continue @@ -181,10 +205,12 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) - def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + def _deserialize_heredoc( + self, value: str, trim: bool + ) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: if trim: return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) - return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" @@ -200,7 +226,9 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree - return rules_tree.body.children[0].expression + result = rules_tree.body.children[0].expression + + return result def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: """Deserialize a block by extracting labels and body""" @@ -220,14 +248,24 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: break return BlockRule( - [*[self._deserialize(label) for label in labels], self._deserialize(body)] + [ + *[self._deserialize(label) for label in labels], + LBRACE(), + self._deserialize(body), + RBRACE(), + ] ) def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + expr_term = self._deserialize(value) + + if not isinstance(expr_term, ExprTermRule): + expr_term = ExprTermRule([expr_term]) + children = [ self._deserialize_identifier(name), EQ(), - ExprTermRule([self._deserialize(value)]), + expr_term, ] return AttributeRule(children) @@ -247,11 +285,21 @@ def _deserialize_object(self, value: dict) -> ObjectRule: children = [] for key, value in value.items(): children.append(self._deserialize_object_elem(key, value)) + + if self.options.object_elements_trailing_comma: + children.append(COMMA()) + return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + key = ObjectElemKeyExpressionRule( + [ + child + for child in self._deserialize_expression(key).children + if child is not None + ] + ) elif "." in key: parts = key.split(".") children = [] @@ -262,13 +310,13 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: else: key = self._deserialize_text(key) - return ObjectElemRule( - [ - ObjectElemKeyRule([key]), - EQ(), - ExprTermRule([self._deserialize_text(value)]), - ] - ) + result = [ + ObjectElemKeyRule([key]), + COLON() if self.options.object_elements_colon else EQ(), + ExprTermRule([self._deserialize(value)]), + ] + + return ObjectElemRule(result) def _is_expression(self, value: str) -> bool: return value.startswith("${") and value.endswith("}") diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/rule_transformer/formatter.py new file mode 100644 index 00000000..ad0247dc --- /dev/null +++ b/hcl2/rule_transformer/formatter.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List + +from hcl2.rule_transformer.rules.abstract import LarkElement +from hcl2.rule_transformer.rules.base import ( + StartRule, + BlockRule, + AttributeRule, + BodyRule, +) +from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +@dataclass +class FormatterOptions: + indent_length: int = 2 + open_empty_blocks: bool = True + open_empty_objects: bool = True + open_empty_tuples: bool = False + + vertically_align_attributes: bool = True + vertically_align_object_elements: bool = True + + +class LarkElementTreeFormatter(ABC): + def __init__(self, options: FormatterOptions = None): + self.options = options or FormatterOptions() + + @abstractmethod + def format_tree(self, tree: LarkElement): + raise NotImplementedError() + + +class BaseFormatter(LarkElementTreeFormatter): + def __init__(self, options: FormatterOptions = None): + super().__init__(options) + self._current_line = 1 + self._current_indent_level = 0 + + def format_tree(self, tree: LarkElement): + if isinstance(tree, StartRule): + self.format_start_rule(tree) + + def format_start_rule(self, rule: StartRule): + self.format_body_rule(rule.body, 0) + # for child in rule.body.children: + # if isinstance(child, BlockRule): + # self.format_block_rule(child, 1) + + def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + if self.options.vertically_align_attributes: + self._vertically_align_attributes_in_body(rule.body) + + self.format_body_rule(rule.body, indent_level) + if len(rule.body.children) > 0: + rule.children.insert(-1, self._build_newline(indent_level - 1)) + elif self.options.open_empty_blocks: + rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) + + def format_body_rule(self, rule: BodyRule, indent_level: int = 0): + + in_start = isinstance(rule.parent, StartRule) + + new_children = [] + if not in_start: + new_children.append(self._build_newline(indent_level)) + + for i, child in enumerate(rule.children): + new_children.append(child) + + if isinstance(child, AttributeRule): + self.format_attribute_rule(child, indent_level) + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, BlockRule): + self.format_block_rule(child, indent_level + 1) + + if i > 0: + new_children.insert(-2, self._build_newline(indent_level)) + new_children.append(self._build_newline(indent_level, 2)) + + new_children.pop(-1) + rule._children = new_children + + def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + self.format_expression(rule.expression, indent_level + 1) + + def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_tuples: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for child in rule.children: + new_children.append(child) + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + if isinstance(child, (COMMA, LSQB)): + new_children.append(self._build_newline(indent_level)) + + self._deindent_last_line() + rule._children = new_children + + def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_objects: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for i in range(len(rule.children)): + child = rule.children[i] + next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None + new_children.append(child) + + if isinstance(child, LBRACE): + new_children.append(self._build_newline(indent_level)) + + if ( + next_child + and isinstance(next_child, ObjectElemRule) + and isinstance(child, (ObjectElemRule, COMMA)) + ): + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, ObjectElemRule): + self.format_expression(child.expression, indent_level + 1) + + new_children.insert(-1, self._build_newline(indent_level)) + self._deindent_last_line() + + rule._children = new_children + + if self.options.vertically_align_object_elements: + self._vertically_align_object_elems(rule) + + def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + if isinstance(rule.expression, ObjectRule): + self.format_object_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, TupleRule): + self.format_tuple_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, ForTupleExprRule): + self.format_fortupleexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ForObjectExprRule): + self.format_forobjectexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ExprTermRule): + self.format_expression(rule.expression) + + def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 5, 7] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + self._deindent_last_line() + # expression.children[8] = self._build_newline(indent_level - 1) + + def format_forobjectexpr( + self, expression: ForObjectExprRule, indent_level: int = 0 + ): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 12] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + + self._deindent_last_line() + + def _vertically_align_attributes_in_body(self, body: BodyRule): + attributes_sequence: List[AttributeRule] = [] + + for child in body.children: + if isinstance(child, AttributeRule): + attributes_sequence.append(child) + + elif attributes_sequence: + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + attributes_sequence = [] + + def _vertically_align_object_elems(self, rule: ObjectRule): + max_length = max(len(elem.key.serialize()) for elem in rule.elements) + for elem in rule.elements: + key_length = len(elem.key.serialize()) + print(elem.key.serialize(), key_length) + + spaces_to_add = max_length - key_length + + separator = elem.children[1] + if isinstance(separator, COLON): + spaces_to_add += 1 + + elem.children[1].set_value(" " * spaces_to_add + separator.value) + + def _move_to_next_line(self, times: int = 1): + self._current_line += times + + def _increase_indent_level(self, times: int = 1): + self._current_indent_level += times + + def _decrease_indent_level(self, times: int = 1): + self._current_indent_level -= times + if self._current_indent_level < 0: + self._current_indent_level = 0 + + def _build_newline( + self, next_line_indent: int = 0, count: int = 1 + ) -> NewLineOrCommentRule: + result = NewLineOrCommentRule( + [ + NL_OR_COMMENT( + ("\n" * count) + " " * self.options.indent_length * next_line_indent + ) + ] + ) + self._last_new_line = result + return result + + def _deindent_last_line(self, times: int = 1): + token = self._last_new_line.token + for i in range(times): + if token.value.endswith(" " * self.options.indent_length): + token.set_value(token.value[: -self.options.indent_length]) + + # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: + # result = Meta() + # result.empty = length == 0 + # result.line = self._current_line + # result.column = indent_level * self.options.indent_length + # # result.start_pos = + # # result.end_line = + # # result.end_column = + # # result.end_pos = + # # result.orig_expansion = + # # result.match_tree = + # return result diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py new file mode 100644 index 00000000..7d316b2c --- /dev/null +++ b/hcl2/rule_transformer/reconstructor.py @@ -0,0 +1,204 @@ +from typing import List, Union + +from lark import Tree, Token +from hcl2.rule_transformer.rules import tokens +from hcl2.rule_transformer.rules.base import BlockRule +from hcl2.rule_transformer.rules.for_expressions import ForIntroRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule + + +class HCLReconstructor: + """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: + return False + + # Don't add space at the beginning + if self._last_token_name is None: + return False + + if isinstance(current_node, Token): + token_type = current_node.type + + # Space before '{' in blocks + if ( + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() + ): + return True + + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + + return True + + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False + return True + + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), + ]: + return True + + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True + + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True + + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True + + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: + return True + + # space around binary operators + if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data + + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True + + return False + + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] + else: + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 33dcc9ca..e83fed2b 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -49,6 +49,9 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value + def set_value(self, value: Any): + self._value = value + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: @@ -96,7 +99,7 @@ def to_lark(self) -> Tree: def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() self._children = children - self._meta = meta + self._meta = meta or Meta() for index, child in enumerate(children): if child is not None: diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 5c8468d4..c879b772 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -5,10 +5,10 @@ from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ +from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext @@ -16,9 +16,9 @@ class AttributeRule(LarkRule): _children: Tuple[ - NAME, + IdentifierRule, EQ, - ExpressionRule, + ExprTermRule, ] @staticmethod @@ -26,11 +26,11 @@ def lark_name() -> str: return "attribute" @property - def identifier(self) -> NAME: + def identifier(self) -> IdentifierRule: return self._children[0] @property - def expression(self) -> ExpressionRule: + def expression(self) -> ExprTermRule: return self._children[2] def serialize( @@ -56,40 +56,32 @@ def lark_name() -> str: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] + attribute_names = set() comments = [] inline_comments = [] + result = defaultdict(list) + for child in self._children: if isinstance(child, BlockRule): - blocks.append(child) + name = child.labels[0].serialize(options) + if name in attribute_names: + raise RuntimeError(f"Attribute {name} is already defined.") + result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attributes.append(child) - # collect in-line comments from attribute assignments, expressions etc - inline_comments.extend(child.expression.inline_comments()) + attribute_names.add(child) + result.update(child.serialize(options)) + if options.with_comments: + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) - if isinstance(child, NewLineOrCommentRule): + if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() if child_comments: comments.extend(child_comments) - result = {} - - for attribute in attributes: - result.update(attribute.serialize(options)) - - result_blocks = defaultdict(list) - for block in blocks: - name = block.labels[0].serialize(options) - if name in result.keys(): - raise RuntimeError(f"Attribute {name} is already defined.") - result_blocks[name].append(block.serialize(options)) - - result.update(**result_blocks) - if options.with_comments: if comments: result["__comments__"] = comments @@ -122,7 +114,9 @@ class BlockRule(LarkRule): _children: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], + LBRACE, BodyRule, + RBRACE, ] def __init__(self, children, meta: Optional[Meta] = None): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index 11ac0f5e..b82abc58 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -14,13 +14,22 @@ EQ, LBRACE, COMMA, - RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, + RBRACE, + LSQB, + RSQB, + LPAR, + RPAR, + DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class TupleRule(InlineCommentMixIn): @@ -33,7 +42,7 @@ class TupleRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], COMMA, Optional[NewLineOrCommentRule], - ... + # ... ], ExpressionRule, Optional[NewLineOrCommentRule], @@ -52,14 +61,18 @@ def elements(self) -> List[ExpressionRule]: child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_tuples: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] with context.modify(inside_dollar_string=True): - result = f"[{", ".join( + result = "[" + result += ", ".join( str(element.serialize(options, context)) for element in self.elements - )}]" + ) + result += "]" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -81,7 +94,9 @@ def lark_name() -> str: def value(self) -> key_T: return self._children[0] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.value.serialize(options, context) @@ -93,7 +108,6 @@ class ObjectElemKeyExpressionRule(LarkRule): RPAR, ] - @staticmethod def lark_name() -> str: return "object_elem_key_expression" @@ -102,7 +116,9 @@ def lark_name() -> str: def expression(self) -> ExpressionRule: return self._children[1] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -117,8 +133,7 @@ class ObjectElemKeyDotAccessor(LarkRule): Tuple[ IdentifierRule, DOT, - ... - ] + ], ] @staticmethod @@ -129,8 +144,12 @@ def lark_name() -> str: def identifiers(self) -> List[IdentifierRule]: return [child for child in self._children if isinstance(child, IdentifierRule)] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: - return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".".join( + identifier.serialize(options, context) for identifier in self.identifiers + ) class ObjectElemRule(LarkRule): @@ -153,9 +172,13 @@ def key(self) -> ObjectElemKeyRule: def expression(self): return self._children[2] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return { - self.key.serialize(options, context): self.expression.serialize(options, context) + self.key.serialize(options, context): self.expression.serialize( + options, context + ) } @@ -169,7 +192,6 @@ class ObjectRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], Optional[COMMA], Optional[NewLineOrCommentRule], - ... ], RBRACE, ] @@ -184,8 +206,10 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_objects: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_objects and not context.inside_dollar_string: result = {} for element in self.elements: result.update(element.serialize(options, context)) @@ -193,12 +217,13 @@ def serialize(self, options = SerializationOptions(), context = SerializationCon return result with context.modify(inside_dollar_string=True): - result = f"{{{", ".join( + result = "{" + result += ", ".join( f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" for element in self.elements - )}}}" + ) + result += "}" if not context.inside_dollar_string: result = to_dollar_string(result) - return result diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index d89f3b3c..0e0c9be8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -16,7 +16,6 @@ from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, - unwrap_dollar_string, SerializationOptions, SerializationContext, ) @@ -58,7 +57,7 @@ def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) @property @@ -100,7 +99,7 @@ def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [2, 4, 6]) + self._insert_optionals(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -118,7 +117,7 @@ def if_false(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - with context.modify(inside_dollar_string=False): + with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " f"? {self.if_true.serialize(options, context)} " @@ -144,7 +143,7 @@ def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py index 18abe6c8..3a89aba3 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -52,18 +52,23 @@ def lark_name() -> str: return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): - # Insert null comments at positions where they might be missing - self._possibly_insert_null_second_identifier(children) - self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): - second_identifier_present = ( - len([child for child in children if isinstance(child, IdentifierRule)]) == 2 - ) - if not second_identifier_present: - children.insert(3, None) - children.insert(4, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + identifiers = [child for child in children if isinstance(child, IdentifierRule)] + second_identifier = identifiers[1] if len(identifiers) == 2 else None + + indexes = [1, 5, 7, 9, 11] + if second_identifier is None: + indexes.extend([3, 4]) + + super()._insert_optionals(children, sorted(indexes)) + + if second_identifier is not None: + children[3] = COMMA() + children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: @@ -90,7 +95,6 @@ def serialize( result += f", {self.second_iterator.serialize(options, context)}" result += f" in {self.iterable.serialize(options, context)} : " - return result @@ -108,7 +112,7 @@ def lark_name() -> str: return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property @@ -142,13 +146,25 @@ def lark_name() -> str: return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 5, 7]) - self._possibly_insert_null_condition(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_condition(self, children: List[LarkElement]): - if not len([child for child in children if isinstance(child, ForCondRule)]): - children.insert(6, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + condition = None + + for child in children: + if isinstance(child, ForCondRule): + condition = child + break + + indexes = [1, 3, 5, 7] + + if condition is None: + indexes.append(6) + + super()._insert_optionals(children, sorted(indexes)) + + children[6] = condition @property def for_intro(self) -> ForIntroRule: @@ -209,30 +225,30 @@ def lark_name() -> str: return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) - self._possibly_insert_null_optionals(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_optionals(self, children: List[LarkElement]): - has_ellipsis = False - has_condition = False + def _insert_optionals(self, children: List, indexes: List[int] = None): + ellipsis_ = None + condition = None for child in children: - # if not has_ellipsis and isinstance(child, ELLIPSIS): - if ( - has_ellipsis is False - and child is not None - and child.lark_name() == ELLIPSIS.lark_name() - ): - has_ellipsis = True - if not has_condition and isinstance(child, ForCondRule): - has_condition = True - - if not has_ellipsis: - children.insert(9, None) - - if not has_condition: - children.insert(11, None) + if ellipsis_ is None and isinstance(child, ELLIPSIS): + ellipsis_ = child + if condition is None and isinstance(child, ForCondRule): + condition = child + + indexes = [1, 3, 6, 8, 10, 12] + + if ellipsis_ is None: + indexes.append(9) + if condition is None: + indexes.append(11) + + super()._insert_optionals(children, sorted(indexes)) + + children[9] = ellipsis_ + children[11] = condition @property def for_intro(self) -> ForIntroRule: @@ -262,6 +278,7 @@ def condition(self) -> Optional[ForCondRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) @@ -270,7 +287,6 @@ def serialize( result += self.value_expr.serialize( SerializationOptions(wrap_objects=True), context ) - if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index b25fed62..9e52a47b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -4,8 +4,15 @@ from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class ArgumentsRule(InlineCommentMixIn): @@ -17,7 +24,7 @@ class ArgumentsRule(InlineCommentMixIn): COMMA, Optional[NewLineOrCommentRule], ExpressionRule, - ... + # ... ], Optional[Union[COMMA, ELLIPSIS]], Optional[NewLineOrCommentRule], @@ -39,8 +46,12 @@ def has_ellipsis(self) -> bool: def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = ", ".join( + [str(argument.serialize(options, context)) for argument in self.arguments] + ) if self.has_ellipsis: result += " ..." return result @@ -75,30 +86,32 @@ def arguments(self) -> Optional[ArgumentsRule]: if isinstance(child, ArgumentsRule): return child - - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ( - f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" - f"({self.arguments.serialize(options, context) if self.arguments else ""})" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += ( + f"({self.arguments.serialize(options, context) if self.arguments else ''})" ) + if not context.inside_dollar_string: result = to_dollar_string(result) return result -# class ProviderFunctionCallRule(FunctionCallRule): -# _children: Tuple[ -# IdentifierRule, -# IdentifierRule, -# IdentifierRule, -# LPAR, -# Optional[NewLineOrCommentRule], -# Optional[ArgumentsRule], -# Optional[NewLineOrCommentRule], -# RPAR, -# ] -# -# @staticmethod -# def lark_name() -> str: -# return "provider_function_call" +class ProviderFunctionCallRule(FunctionCallRule): + _children: Tuple[ + IdentifierRule, + IdentifierRule, + IdentifierRule, + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index 7a9b53a5..20decf00 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -67,7 +67,7 @@ def serialize( return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 67d53fcf..ba948d3e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -37,6 +37,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: class StaticStringToken(LarkToken): + + classes_by_value = {} + @classmethod @lru_cache(maxsize=None) def __build_subclass( @@ -44,7 +47,7 @@ def __build_subclass( ) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" - return type( # type: ignore + result = type( # type: ignore f"{name}_TOKEN", (cls,), { @@ -53,6 +56,8 @@ def __build_subclass( "_default_value": default_value, }, ) + cls.classes_by_value[default_value] = result + return result def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: name, default_value = value @@ -72,8 +77,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index fa24355c..62069b78 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -3,7 +3,7 @@ from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): @@ -15,6 +15,11 @@ def lark_name() -> str: def from_string(cls, string: str) -> "NewLineOrCommentRule": return cls([LarkToken("NL_OR_COMMENT", string)]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: @@ -43,7 +48,7 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + def _insert_optionals(self, children: List, indexes: List[int] = None): for index in indexes: try: child = children[index] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1ab1dfda..931eab8e 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -60,6 +60,7 @@ IntLiteral, FloatLiteral, StringToken, + StaticStringToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -80,6 +81,8 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable + if token.value in StaticStringToken.classes_by_value.keys(): + return StaticStringToken.classes_by_value[token.value]() return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 98370ca3..8f1d7352 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -7,7 +7,6 @@ HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - @dataclass class SerializationOptions: with_comments: bool = True @@ -18,11 +17,6 @@ class SerializationOptions: preserve_heredocs: bool = True -@dataclass -class DeserializationOptions: - heredocs_to_strings: bool = False - - @dataclass class SerializationContext: inside_dollar_string: bool = False From 5ccfa657f28f152ea338c03d36508e365046c6f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 12 Dec 2025 14:09:37 +0100 Subject: [PATCH 11/42] * HCLReconstructor._reconstruct_token - handle 0 length tokens * FunctionCallRule.serialize - properly serialize into dollar string * remove unused import --- hcl2/rule_transformer/reconstructor.py | 3 ++- hcl2/rule_transformer/rules/containers.py | 1 - hcl2/rule_transformer/rules/functions.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 7d316b2c..6aa8a4a4 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -167,7 +167,8 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: result = " " + result self._last_token_name = token.type - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index b82abc58..a2f53436 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,4 +1,3 @@ -import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 9e52a47b..92cc8b11 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -89,10 +89,9 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += ( - f"({self.arguments.serialize(options, context) if self.arguments else ''})" - ) + with context.modify(inside_dollar_string=True): + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" if not context.inside_dollar_string: result = to_dollar_string(result) From ca192325cc03a72618773cf31199b53c27e24774 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 21 Feb 2026 14:33:09 +0100 Subject: [PATCH 12/42] fix operator precedence --- hcl2/rule_transformer/hcl2.lark | 60 +++++++++++++++++++--- hcl2/rule_transformer/reconstructor.py | 41 +++++++++++++-- hcl2/rule_transformer/rules/expressions.py | 55 +++++++++++++++++--- hcl2/rule_transformer/utils.py | 2 + 4 files changed, 138 insertions(+), 20 deletions(-) diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 24140ada..63154efb 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -24,7 +24,6 @@ FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EX | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) // Operators -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -99,16 +98,61 @@ string_part: STRING_CHARS | interpolation // Expressions -?expression : expr_term | operation | conditional +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr interpolation: INTERP_START expression RBRACE -conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -// Operations -?operation : unary_op | binary_op +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator + +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term !unary_op : (MINUS | NOT) expr_term -binary_op : expression binary_term new_line_or_comment? -binary_term : binary_operator new_line_or_comment? expression -!binary_operator : BINARY_OP // Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 6aa8a4a4..099beead 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -6,12 +6,32 @@ from hcl2.rule_transformer.rules.for_expressions import ForIntroRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule +from hcl2.rule_transformer.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } + def __init__(self): self._reset_state() @@ -105,8 +125,14 @@ def _should_add_space_before( if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - # space around binary operators - if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): + return False + + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): return True elif isinstance(current_node, Tree): @@ -130,7 +156,14 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st result = [] rule_name = tree.data - if rule_name == ExprTermRule.lark_name(): + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): # Check if parenthesized if ( len(tree.children) >= 3 diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index 0e0c9be8..db256e82 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -26,8 +26,30 @@ class ExpressionRule(InlineCommentMixIn, ABC): def lark_name() -> str: return "expression" - def __init__(self, children, meta: Optional[Meta] = None): + def __init__( + self, children, meta: Optional[Meta] = None, parentheses: bool = False + ): super().__init__(children, meta) + self._parentheses = parentheses + + def _wrap_into_parentheses( + self, value: str, options=SerializationOptions(), context=SerializationContext() + ) -> str: + # do not wrap into parentheses if + # 1. already wrapped or + # 2. is top-level expression (unless explicitly wrapped) + if context.inside_parentheses: + return value + # Look through ExprTermRule wrapper to determine if truly nested + parent = getattr(self, "parent", None) + if parent is None: + return value + if isinstance(parent, ExprTermRule): + if not isinstance(parent.parent, ExpressionRule): + return value + elif not isinstance(parent, ExpressionRule): + return value + return wrap_into_parentheses(value) class ExprTermRule(ExpressionRule): @@ -47,18 +69,18 @@ def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): - self._parentheses = False + parentheses = False if ( isinstance(children[0], LarkToken) and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) and children[-1].lark_name() == "RPAR" ): - self._parentheses = True + parentheses = True else: children = [None, *children, None] self._insert_optionals(children, [1, 3]) - super().__init__(children, meta) + super().__init__(children, meta, parentheses) @property def parentheses(self) -> bool: @@ -71,7 +93,10 @@ def expression(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = self.expression.serialize(options, context) + with context.modify( + inside_parentheses=self.parentheses or context.inside_parentheses + ): + result = self.expression.serialize(options, context) if self.parentheses: result = wrap_into_parentheses(result) @@ -127,6 +152,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + return result @@ -192,6 +220,9 @@ def serialize( if not context.inside_dollar_string: result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) return result @@ -214,6 +245,14 @@ def expr_term(self): def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string( - f"{self.operator}{self.expr_term.serialize(options, context)}" - ) + + with context.modify(inside_dollar_string=True): + result = f"{self.operator}{self.expr_term.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + if options.force_operation_parentheses: + result = self._wrap_into_parentheses(result, options, context) + + return result diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8f1d7352..68c32ebc 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -15,11 +15,13 @@ class SerializationOptions: wrap_tuples: bool = False explicit_blocks: bool = True preserve_heredocs: bool = True + force_operation_parentheses: bool = False @dataclass class SerializationContext: inside_dollar_string: bool = False + inside_parentheses: bool = False def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) From fc49bad9b819f5ce89ea5ed876880248c4f621b9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:20:06 +0100 Subject: [PATCH 13/42] reorganize new and old code --- hcl2/__init__.py | 1 - hcl2/api.py | 21 +- hcl2/{rule_transformer => }/deserializer.py | 20 +- hcl2/dict_transformer.py | 403 -------- hcl2/{rule_transformer => }/editor.py | 0 hcl2/{rule_transformer => }/formatter.py | 14 +- hcl2/hcl2.lark | 207 ++-- hcl2/parser.py | 24 +- hcl2/{rule_transformer => }/processor.py | 0 hcl2/py.typed | 0 hcl2/reconstructor.py | 913 ++++-------------- hcl2/rule_transformer/hcl2.lark | 207 ---- hcl2/rule_transformer/json.py | 12 - hcl2/rule_transformer/reconstructor.py | 238 ----- hcl2/rule_transformer/rules/__init__.py | 0 hcl2/{rule_transformer => rules}/__init__.py | 0 hcl2/{rule_transformer => }/rules/abstract.py | 2 +- hcl2/{rule_transformer => }/rules/base.py | 16 +- .../rules/containers.py | 14 +- .../rules/expressions.py | 10 +- .../rules/for_expressions.py | 12 +- .../{rule_transformer => }/rules/functions.py | 10 +- hcl2/{rule_transformer => }/rules/indexing.py | 12 +- .../rules/literal_rules.py | 4 +- hcl2/{rule_transformer => }/rules/strings.py | 8 +- hcl2/{rule_transformer => }/rules/tokens.py | 2 +- hcl2/{rule_transformer => }/rules/tree.py | 0 .../rules/whitespace.py | 6 +- hcl2/{rule_transformer => }/transformer.py | 20 +- hcl2/{rule_transformer => }/utils.py | 0 30 files changed, 442 insertions(+), 1734 deletions(-) rename hcl2/{rule_transformer => }/deserializer.py (94%) delete mode 100644 hcl2/dict_transformer.py rename hcl2/{rule_transformer => }/editor.py (100%) rename hcl2/{rule_transformer => }/formatter.py (94%) rename hcl2/{rule_transformer => }/processor.py (100%) delete mode 100644 hcl2/py.typed delete mode 100644 hcl2/rule_transformer/hcl2.lark delete mode 100644 hcl2/rule_transformer/json.py delete mode 100644 hcl2/rule_transformer/reconstructor.py delete mode 100644 hcl2/rule_transformer/rules/__init__.py rename hcl2/{rule_transformer => rules}/__init__.py (100%) rename hcl2/{rule_transformer => }/rules/abstract.py (97%) rename hcl2/{rule_transformer => }/rules/base.py (88%) rename hcl2/{rule_transformer => }/rules/containers.py (93%) rename hcl2/{rule_transformer => }/rules/expressions.py (95%) rename hcl2/{rule_transformer => }/rules/for_expressions.py (95%) rename hcl2/{rule_transformer => }/rules/functions.py (90%) rename hcl2/{rule_transformer => }/rules/indexing.py (94%) rename hcl2/{rule_transformer => }/rules/literal_rules.py (85%) rename hcl2/{rule_transformer => }/rules/strings.py (94%) rename hcl2/{rule_transformer => }/rules/tokens.py (98%) rename hcl2/{rule_transformer => }/rules/tree.py (100%) rename hcl2/{rule_transformer => }/rules/whitespace.py (90%) rename hcl2/{rule_transformer => }/transformer.py (93%) rename hcl2/{rule_transformer => }/utils.py (100%) diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 62f5a198..2d5dad09 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -11,7 +11,6 @@ parse, parses, transform, - reverse_transform, writes, ) diff --git a/hcl2/api.py b/hcl2/api.py index 1cec02a2..7c384c53 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -2,9 +2,9 @@ from typing import TextIO from lark.tree import Tree -from hcl2.parser import parser, reconstruction_parser -from hcl2.dict_transformer import DictTransformer -from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer +from hcl2.parser import parser +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer def load(file: TextIO, with_meta=False) -> dict: @@ -27,7 +27,7 @@ def loads(text: str, with_meta=False) -> dict: # This means that all blocks must end in a new line even if the file ends # Append a new line as a temporary fix tree = parser().parse(text + "\n") - return DictTransformer(with_meta=with_meta).transform(tree) + return RuleTransformer().transform(tree) def parse(file: TextIO) -> Tree: @@ -41,7 +41,7 @@ def parses(text: str) -> Tree: """Load HCL2 syntax tree from a string. :param text: Text with hcl2 to be loaded as a dict. """ - return reconstruction_parser().parse(text) + return parser().parse(text) def transform(ast: Tree, with_meta=False) -> dict: @@ -50,18 +50,11 @@ def transform(ast: Tree, with_meta=False) -> dict: :param with_meta: If set to true then adds `__start_line__` and `__end_line__` parameters to the output dict. Default to false. """ - return DictTransformer(with_meta=with_meta).transform(ast) - - -def reverse_transform(hcl2_dict: dict) -> Tree: - """Convert a dictionary to an HCL2 AST. - :param hcl2_dict: a dictionary produced by `load` or `transform` - """ - return HCLReverseTransformer().transform(hcl2_dict) + return RuleTransformer().transform(ast) def writes(ast: Tree) -> str: """Convert an HCL2 syntax tree to a string. :param ast: HCL2 syntax tree, output from `parse` or `parses` """ - return HCLReconstructor(reconstruction_parser()).reconstruct(ast) + return HCLReconstructor().reconstruct(ast) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/deserializer.py similarity index 94% rename from hcl2/rule_transformer/deserializer.py rename to hcl2/deserializer.py index 56e1ad44..2290809c 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/deserializer.py @@ -8,14 +8,14 @@ from hcl2 import parses from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement, LarkRule +from hcl2.rules.base import ( BlockRule, AttributeRule, BodyRule, StartRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( TupleRule, ObjectRule, ObjectElemRule, @@ -23,20 +23,20 @@ ObjectElemKeyDotAccessor, ObjectElemKeyRule, ) -from hcl2.rule_transformer.rules.expressions import ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import ( IdentifierRule, IntLitRule, FloatLitRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( StringRule, InterpolationRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, EQ, DBLQUOTE, @@ -55,9 +55,9 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.transformer import RuleTransformer +from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @dataclass diff --git a/hcl2/dict_transformer.py b/hcl2/dict_transformer.py deleted file mode 100644 index 64c58bcb..00000000 --- a/hcl2/dict_transformer.py +++ /dev/null @@ -1,403 +0,0 @@ -"""A Lark Transformer for transforming a Lark parse tree into a Python dict""" -import json -import re -import sys -from collections import namedtuple -from typing import List, Dict, Any - -from lark import Token -from lark.tree import Meta -from lark.visitors import Transformer, Discard, _DiscardType, v_args - -from .reconstructor import reverse_quotes_within_interpolation - - -HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) -HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - - -START_LINE = "__start_line__" -END_LINE = "__end_line__" - - -Attribute = namedtuple("Attribute", ("key", "value")) - - -# pylint: disable=missing-function-docstring,unused-argument -class DictTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a dict. - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self.with_meta = with_meta - super().__init__() - - def float_lit(self, args: List) -> float: - value = "".join([self.to_tf_inline(arg) for arg in args]) - if "e" in value: - return self.to_string_dollar(value) - return float(value) - - def int_lit(self, args: List) -> int: - return int("".join([self.to_tf_inline(arg) for arg in args])) - - def expr_term(self, args: List) -> Any: - args = self.strip_new_line_tokens(args) - - if args[0] == "true": - return True - if args[0] == "false": - return False - if args[0] == "null": - return None - - if args[0] == "(" and args[-1] == ")": - return "".join(str(arg) for arg in args) - - return args[0] - - def index_expr_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"{args[0]}{args[1]}" - - def index(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return f"[{args[0]}]" - - def get_attr_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def get_attr(self, args: List) -> str: - return f".{args[0]}" - - def attr_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def attr_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f".*{args_str}" - - def full_splat_expr_term(self, args: List) -> str: - return f"{args[0]}{args[1]}" - - def full_splat(self, args: List) -> str: - args_str = "".join(self.to_tf_inline(arg) for arg in args) - return f"[*]{args_str}" - - def tuple(self, args: List) -> List: - return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)] - - def object_elem(self, args: List) -> Dict: - # This returns a dict with a single key/value pair to make it easier to merge these - # into a bigger dict that is returned by the "object" function - - key = str(args[0].children[0]) - if not re.match(r".*?(\${).*}.*", key): - # do not strip quotes of a interpolation string - key = self.strip_quotes(key) - - value = self.to_string_dollar(args[2]) - return {key: value} - - def object_elem_key_dot_accessor(self, args: List) -> str: - return "".join(args) - - def object_elem_key_expression(self, args: List) -> str: - return self.to_string_dollar("".join(args)) - - def object(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - result: Dict[str, Any] = {} - for arg in args: - if ( - isinstance(arg, Token) and arg.type == "COMMA" - ): # skip optional comma at the end of object element - continue - - result.update(arg) - return result - - def function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 1: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard] - ) - return f"{args[0]}({args_str})" - - def provider_function_call(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args_str = "" - if len(args) > 5: - args_str = ", ".join( - [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard] - ) - provider_func = "::".join([args[0], args[2], args[4]]) - return f"{provider_func}({args_str})" - - def arguments(self, args: List) -> List: - return self.process_nulls(args) - - @v_args(meta=True) - def block(self, meta: Meta, args: List) -> Dict: - *block_labels, block_body = args - result: Dict[str, Any] = block_body - if self.with_meta: - result.update( - { - START_LINE: meta.line, - END_LINE: meta.end_line, - } - ) - - # create nested dict. i.e. {label1: {label2: {labelN: result}}} - for label in reversed(block_labels): - label_str = self.strip_quotes(label) - result = {label_str: result} - - return result - - def attribute(self, args: List) -> Attribute: - key = str(args[0]) - if key.startswith('"') and key.endswith('"'): - key = key[1:-1] - value = self.to_string_dollar(args[2]) - return Attribute(key, value) - - def conditional(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return f"{args[0]} ? {args[1]} : {args[2]}" - - def binary_op(self, args: List) -> str: - return " ".join( - [self.unwrap_string_dollar(self.to_tf_inline(arg)) for arg in args] - ) - - def unary_op(self, args: List) -> str: - args = self.process_nulls(args) - return "".join([self.to_tf_inline(arg) for arg in args]) - - def binary_term(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - args = self.process_nulls(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def body(self, args: List) -> Dict[str, List]: - # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies - # --- - # A body is a collection of associated attributes and blocks. - # - # An attribute definition assigns a value to a particular attribute - # name within a body. Each distinct attribute name may be defined no - # more than once within a single body. - # - # A block creates a child body that is annotated with a block type and - # zero or more block labels. Blocks create a structural hierarchy which - # can be interpreted by the calling application. - # --- - # - # There can be more than one child body with the same block type and - # labels. This means that all blocks (even when there is only one) - # should be transformed into lists of blocks. - args = self.strip_new_line_tokens(args) - attributes = set() - result: Dict[str, Any] = {} - for arg in args: - if isinstance(arg, Attribute): - if arg.key in result: - raise RuntimeError(f"{arg.key} already defined") - result[arg.key] = arg.value - attributes.add(arg.key) - else: - # This is a block. - for key, value in arg.items(): - key = str(key) - if key in result: - if key in attributes: - raise RuntimeError(f"{key} already defined") - result[key].append(value) - else: - result[key] = [value] - - return result - - def start(self, args: List) -> Dict: - args = self.strip_new_line_tokens(args) - return args[0] - - def binary_operator(self, args: List) -> str: - return str(args[0]) - - def heredoc_template(self, args: List) -> str: - match = HEREDOC_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - result = match.group(2).rstrip(trim_chars) - return f'"{result}"' - - def heredoc_template_trim(self, args: List) -> str: - # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions - # This is a special version of heredocs that are declared with "<<-" - # This will calculate the minimum number of leading spaces in each line of a heredoc - # and then remove that number of spaces from each line - match = HEREDOC_TRIM_PATTERN.match(str(args[0])) - if not match: - raise RuntimeError(f"Invalid Heredoc token: {args[0]}") - - trim_chars = "\n\t " - text = match.group(2).rstrip(trim_chars) - lines = text.split("\n") - - # calculate the min number of leading spaces in each line - min_spaces = sys.maxsize - for line in lines: - leading_spaces = len(line) - len(line.lstrip(" ")) - min_spaces = min(min_spaces, leading_spaces) - - # trim off that number of leading spaces from each line - lines = [line[min_spaces:] for line in lines] - - return '"%s"' % "\n".join(lines) - - def new_line_or_comment(self, args: List) -> _DiscardType: - return Discard - - # def EQ(self, args: List): - # print("EQ", args) - # return args - - def for_tuple_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - return f"[{for_expr}]" - - def for_intro(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_cond(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - return " ".join([self.to_tf_inline(arg) for arg in args]) - - def for_object_expr(self, args: List) -> str: - args = self.strip_new_line_tokens(args) - for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) - # doubled curly braces stands for inlining the braces - # and the third pair of braces is for the interpolation - # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}" - return f"{{{for_expr}}}" - - def string(self, args: List) -> str: - return '"' + "".join(args) + '"' - - def string_part(self, args: List) -> str: - value = self.to_tf_inline(args[0]) - if value.startswith('"') and value.endswith('"'): - value = value[1:-1] - return value - - def interpolation(self, args: List) -> str: - return '"${' + str(args[0]) + '}"' - - def strip_new_line_tokens(self, args: List) -> List: - """ - Remove new line and Discard tokens. - The parser will sometimes include these in the tree so we need to strip them out here - """ - return [arg for arg in args if arg != "\n" and arg is not Discard] - - def is_string_dollar(self, value: str) -> bool: - if not isinstance(value, str): - return False - return value.startswith("${") and value.endswith("}") - - def to_string_dollar(self, value: Any) -> Any: - """Wrap a string in ${ and }""" - if not isinstance(value, str): - return value - # if it's already wrapped, pass it unmodified - if self.is_string_dollar(value): - return value - - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - - if self.is_type_keyword(value): - return value - - return f"${{{value}}}" - - def unwrap_string_dollar(self, value: str): - if self.is_string_dollar(value): - return value[2:-1] - return value - - def strip_quotes(self, value: Any) -> Any: - """Remove quote characters from the start and end of a string""" - if isinstance(value, str): - if value.startswith('"') and value.endswith('"'): - value = str(value)[1:-1] - return self.process_escape_sequences(value) - return value - - def process_escape_sequences(self, value: str) -> str: - """Process HCL escape sequences within quoted template expressions.""" - if isinstance(value, str): - # normal escape sequences - value = value.replace("\\n", "\n") - value = value.replace("\\r", "\r") - value = value.replace("\\t", "\t") - value = value.replace('\\"', '"') - value = value.replace("\\\\", "\\") - - # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched - # for now, but this method can be extended in the future - return value - - def process_nulls(self, args: List) -> List: - return ["null" if arg is None else arg for arg in args] - - def to_tf_inline(self, value: Any) -> str: - """ - Converts complex objects (e.g.) dicts to an "inline" HCL syntax - for use in function calls and ${interpolation} strings - """ - if isinstance(value, dict): - dict_v = json.dumps(value) - return reverse_quotes_within_interpolation(dict_v) - if isinstance(value, list): - value = [self.to_tf_inline(item) for item in value] - return f"[{', '.join(value)}]" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, str): - return value - if isinstance(value, (int, float)): - return str(value) - if value is None: - return "None" - - raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}") - - def identifier(self, value: Any) -> Any: - # Making identifier a token by capitalizing it to IDENTIFIER - # seems to return a token object instead of the str - # So treat it like a regular rule - # In this case we just convert the whole thing to a string - return str(value[0]) diff --git a/hcl2/rule_transformer/editor.py b/hcl2/editor.py similarity index 100% rename from hcl2/rule_transformer/editor.py rename to hcl2/editor.py diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/formatter.py similarity index 94% rename from hcl2/rule_transformer/formatter.py rename to hcl2/formatter.py index ad0247dc..205d2ddd 100644 --- a/hcl2/rule_transformer/formatter.py +++ b/hcl2/formatter.py @@ -2,21 +2,21 @@ from dataclasses import dataclass from typing import List -from hcl2.rule_transformer.rules.abstract import LarkElement -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.abstract import LarkElement +from hcl2.rules.base import ( StartRule, BlockRule, AttributeRule, BodyRule, ) -from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ) -from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rules.whitespace import NewLineOrCommentRule @dataclass diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 78ba3ca6..63154efb 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -1,27 +1,29 @@ -start : body -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? "{" body "}" -new_line_or_comment: ( NL_OR_COMMENT )+ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ -identifier : NAME | IN | FOR | IF | FOR_EACH -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +// Keywords IF : "if" IN : "in" FOR : "for" FOR_EACH : "for_each" -?expression : expr_term | operation | conditional -conditional : expression "?" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? expression +// Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) -?operation : unary_op | binary_op -!unary_op : ("-" | "!") expr_term -binary_op : expression binary_term new_line_or_comment? -!binary_operator : BINARY_OP -binary_term : binary_operator new_line_or_comment? expression -BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +// Operators DOUBLE_EQ : "==" NEQ : "!=" LT : "<" @@ -35,74 +37,171 @@ PERCENT : "%" DOUBLE_AMP : "&&" DOUBLE_PIPE : "||" PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation LPAR : "(" RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" COMMA : "," DOT : "." +EQ : /[ \t]*=(?!=|>)/ COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional + | or_expr +interpolation: INTERP_START expression RBRACE + +// Operator precedence ladder (lowest to highest) +// Each level uses left recursion for left-associativity. +// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain +// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. + +// Logical OR +?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op + | and_expr +or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term +!or_binary_operator : DOUBLE_PIPE -> binary_operator + +// Logical AND +?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op + | eq_expr +and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term +!and_binary_operator : DOUBLE_AMP -> binary_operator + +// Equality +?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op + | rel_expr +eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term +!eq_binary_operator : DOUBLE_EQ -> binary_operator + | NEQ -> binary_operator + +// Relational +?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op + | add_expr +rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term +!rel_binary_operator : LT -> binary_operator + | GT -> binary_operator + | LEQ -> binary_operator + | GEQ -> binary_operator +// Additive +?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op + | mul_expr +add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term +!add_binary_operator : PLUS -> binary_operator + | MINUS -> binary_operator + +// Multiplicative +?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op + | unary_expr +mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term +!mul_binary_operator : ASTERISK -> binary_operator + | SLASH -> binary_operator + | PERCENT -> binary_operator + +// Unary (highest precedence for operations) +?unary_expr : unary_op | expr_term +!unary_op : (MINUS | NOT) expr_term + +// Expression terms expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | float_lit | int_lit | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr -string: "\"" string_part* "\"" -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation -interpolation: "${" expression "}" -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ - -int_lit : NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+ -!float_lit: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) ("." DECIMAL+)? (EXP_MARK) -NEGATIVE_DECIMAL : "-" DECIMAL -DECIMAL : "0".."9" -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -EQ : /[ \t]*=(?!=|>)/ - -tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]" -object : "{" new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* "}" +// Collections +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR object_elem_key_dot_accessor : identifier (DOT identifier)+ -heredoc_template : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -heredoc_template_trim : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM -function_call : identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" -arguments : (expression (new_line_or_comment* "," new_line_or_comment* expression)* ("," | "...")? new_line_or_comment*) -colons: "::" -provider_function_call: identifier colons identifier colons identifier "(" new_line_or_comment? arguments? new_line_or_comment? ")" +// Functions +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) +// Indexing and attribute access index_expr_term : expr_term index get_attr_expr_term : expr_term get_attr attr_splat_expr_term : expr_term attr_splat full_splat_expr_term : expr_term full_splat -index : "[" new_line_or_comment? expression new_line_or_comment? "]" | "." DECIMAL+ -get_attr : "." identifier -attr_splat : ".*" get_attr* -full_splat : "[*]" (get_attr | index)* +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT (get_attr | index)* +full_splat : FULL_SPLAT_START (get_attr | index)* -FOR_OBJECT_ARROW : "=>" -!for_tuple_expr : "[" new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? "]" -!for_object_expr : "{" new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? "..."? new_line_or_comment? for_cond? new_line_or_comment? "}" -!for_intro : "for" new_line_or_comment? identifier ("," identifier new_line_or_comment?)? new_line_or_comment? "in" new_line_or_comment? expression new_line_or_comment? ":" new_line_or_comment? -!for_cond : "if" new_line_or_comment? expression - -%ignore /[ \t]+/ +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/parser.py b/hcl2/parser.py index 3e524736..a33fe5f8 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,31 +12,9 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark", + "hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, propagate_positions=True, ) - - -@functools.lru_cache() -def reconstruction_parser() -> Lark: - """ - Build parser for transforming python structures into HCL2 text. - This is duplicated from `parser` because we need different options here for - the reconstructor. Please make sure changes are kept in sync between the two - if necessary. - """ - return Lark.open( - "rule_transformer/hcl2.lark", - parser="lalr", - # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: - # - # https://github.com/lark-parser/lark/issues/1472 - # - # cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar - rel_to=__file__, - propagate_positions=True, - maybe_placeholders=False, # Needed for reconstruction - ) diff --git a/hcl2/rule_transformer/processor.py b/hcl2/processor.py similarity index 100% rename from hcl2/rule_transformer/processor.py rename to hcl2/processor.py diff --git a/hcl2/py.typed b/hcl2/py.typed deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 555edcf6..e92f7040 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,739 +1,238 @@ -"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality""" - -import re -from typing import List, Dict, Callable, Optional, Union, Any, Tuple - -from lark import Lark, Tree -from lark.grammar import Terminal, Symbol -from lark.lexer import Token, PatternStr, TerminalDef -from lark.reconstruct import Reconstructor -from lark.tree_matcher import is_discarded_terminal -from lark.visitors import Transformer_InPlace -from regex import regex - -from hcl2.const import START_LINE_KEY, END_LINE_KEY -from hcl2.parser import reconstruction_parser - - -# function to remove the backslashes within interpolated portions -def reverse_quotes_within_interpolation(interp_s: str) -> str: - """ - A common operation is to `json.dumps(s)` where s is a string to output in - HCL. This is useful for automatically escaping any quotes within the - string, but this escapes quotes within interpolation incorrectly. This - method removes any erroneous escapes within interpolated segments of a - string. - """ - return re.sub(r"\$\{(.*)}", lambda m: m.group(0).replace('\\"', '"'), interp_s) - - -class WriteTokensAndMetaTransformer(Transformer_InPlace): - """ - Inserts discarded tokens into their correct place, according to the rules - of grammar, and annotates with metadata during reassembly. The metadata - tracked here include the terminal which generated a particular string - output, and the rule that that terminal was matched on. - - This is a modification of lark.reconstruct.WriteTokensTransformer - """ - - tokens: Dict[str, TerminalDef] - term_subs: Dict[str, Callable[[Symbol], str]] - - def __init__( - self, - tokens: Dict[str, TerminalDef], - term_subs: Dict[str, Callable[[Symbol], str]], - ) -> None: - super().__init__() - self.tokens = tokens - self.term_subs = term_subs - - def __default__(self, data, children, meta): - """ - This method is called for every token the transformer visits. - """ - - if not getattr(meta, "match_tree", False): - return Tree(data, children) - iter_args = iter( - [child[2] if isinstance(child, tuple) else child for child in children] - ) - to_write = [] - for sym in meta.orig_expansion: - if is_discarded_terminal(sym): - try: - value = self.term_subs[sym.name](sym) - except KeyError as exc: - token = self.tokens[sym.name] - if not isinstance(token.pattern, PatternStr): - raise NotImplementedError( - f"Reconstructing regexps not supported yet: {token}" - ) from exc - - value = token.pattern.value - - # annotate the leaf with the specific rule (data) and terminal - # (sym) it was generated from - to_write.append((data, sym, value)) - else: - item = next(iter_args) - if isinstance(item, list): - to_write += item - else: - if isinstance(item, Token): - # annotate the leaf with the specific rule (data) and - # terminal (sym) it was generated from - to_write.append((data, sym, item)) - else: - to_write.append(item) - - return to_write - - -class HCLReconstructor(Reconstructor): +from typing import List, Union + +from lark import Tree, Token +from hcl2.rules import tokens +from hcl2.rules.base import BlockRule +from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.expressions import ( + ExprTermRule, + ConditionalRule, + UnaryOpRule, +) + + +class HCLReconstructor: """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - def __init__( - self, - parser: Lark, - term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None, - ): - Reconstructor.__init__(self, parser, term_subs) - - self.write_tokens: WriteTokensAndMetaTransformer = ( - WriteTokensAndMetaTransformer( - {token.name: token for token in self.tokens}, term_subs or {} - ) - ) - - # these variables track state during reconstruction to enable us to make - # informed decisions about formatting output. They are primarily used - # by the _should_add_space(...) method. - self._last_char_space = True - self._last_terminal: Union[Terminal, None] = None - self._last_rule: Union[Tree, Token, None] = None - self._deferred_item = None - - def should_be_wrapped_in_spaces(self, terminal: Terminal) -> bool: - """Whether given terminal should be wrapped in spaces""" - return terminal.name in { - "IF", - "IN", - "FOR", - "FOR_EACH", - "FOR_OBJECT_ARROW", - "COLON", - "QMARK", - "BINARY_OP", - } - - def _is_equals_sign(self, terminal) -> bool: - return ( - isinstance(self._last_rule, Token) - and self._last_rule.value in ("attribute", "object_elem") - and self._last_terminal == Terminal("EQ") - and terminal != Terminal("NL_OR_COMMENT") - ) - - # pylint: disable=too-many-branches, too-many-return-statements - def _should_add_space(self, rule, current_terminal, is_block_label: bool = False): - """ - This method documents the situations in which we add space around - certain tokens while reconstructing the generated HCL. - - Additional rules can be added here if the generated HCL has - improper whitespace (affecting parse OR affecting ability to perfectly - reconstruct a file down to the whitespace level.) - - It has the following information available to make its decision: - - - the last token (terminal) we output - - the last rule that token belonged to - - the current token (terminal) we're about to output - - the rule the current token belongs to - - This should be sufficient to make a spacing decision. - """ - - # we don't need to add multiple spaces - if self._last_char_space: - return False + _binary_op_types = { + "DOUBLE_EQ", + "NEQ", + "LT", + "GT", + "LEQ", + "GEQ", + "MINUS", + "ASTERISK", + "SLASH", + "PERCENT", + "DOUBLE_AMP", + "DOUBLE_PIPE", + "PLUS", + } - # we don't add a space at the start of the file - if not self._last_terminal or not self._last_rule: + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: return False - if self._is_equals_sign(current_terminal): - return True + # Don't add space at the beginning + if self._last_token_name is None: + return False - if is_block_label: - pass - # print(rule, self._last_rule, current_terminal, self._last_terminal) + if isinstance(current_node, Token): + token_type = current_node.type - if is_block_label and isinstance(rule, Token) and rule.value == "string": + # Space before '{' in blocks if ( - current_terminal == self._last_terminal == Terminal("DBLQUOTE") - or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("IDENTIFIER") + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() ): - # print("true") return True - # if we're in a ternary or binary operator, add space around the operator - if ( - isinstance(rule, Token) - and rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we just left a ternary or binary operator, add space around the - # operator unless there's a newline already - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "conditional", - "binary_operator", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a for or if statement and find a keyword, add a space - if ( - isinstance(rule, Token) - and rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(current_terminal) - ): - return True - - # if we've just left a for or if statement and find a keyword, add a - # space, unless we have a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value - in [ - "for_object_expr", - "for_cond", - "for_intro", - ] - and self.should_be_wrapped_in_spaces(self._last_terminal) - and current_terminal != Terminal("NL_OR_COMMENT") - ): - return True - - # if we're in a block - if (isinstance(rule, Token) and rule.value == "block") or ( - isinstance(rule, str) and re.match(r"^__block_(star|plus)_.*", rule) - ): - # always add space before the starting brace - if current_terminal == Terminal("LBRACE"): + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): return True - # always add space before the closing brace - if current_terminal == Terminal( - "RBRACE" - ) and self._last_terminal != Terminal("LBRACE"): + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + return True - # always add space between string literals - if current_terminal == Terminal("STRING_CHARS"): + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False return True - # if we just opened a block, add a space, unless the block is empty - # or has a newline - if ( - isinstance(self._last_rule, Token) - and self._last_rule.value == "block" - and self._last_terminal == Terminal("LBRACE") - and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")] - ): - return True - - # if we're in a tuple or function arguments (this rule matches commas between items) - if isinstance(self._last_rule, str) and re.match( - r"^__(tuple|arguments)_(star|plus)_.*", self._last_rule - ): - - # string literals, decimals, and identifiers should always be - # preceded by a space if they're following a comma in a tuple or - # function arg - if current_terminal in [ - Terminal("DBLQUOTE"), - Terminal("DECIMAL"), - Terminal("NAME"), - Terminal("NEGATIVE_DECIMAL"), + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), ]: return True - # the catch-all case, we're not sure, so don't add a space - return False - - def _reconstruct(self, tree, is_block_label=False): - unreduced_tree = self.match_tree(tree, tree.data) - res = self.write_tokens.transform(unreduced_tree) - for item in res: - # any time we encounter a child tree, we recurse - if isinstance(item, Tree): - yield from self._reconstruct( - item, (unreduced_tree.data == "block" and item.data != "body") - ) - - # every leaf should be a tuple, which contains information about - # which terminal the leaf represents - elif isinstance(item, tuple): - rule, terminal, value = item - - # first, handle any deferred items - if self._deferred_item is not None: - ( - deferred_rule, - deferred_terminal, - deferred_value, - ) = self._deferred_item - - # if we deferred a comma and the next character ends a - # parenthesis or block, we can throw it out - if deferred_terminal == Terminal("COMMA") and terminal in [ - Terminal("RPAR"), - Terminal("RBRACE"), - ]: - pass - # in any other case, we print the deferred item - else: - yield deferred_value - - # and do our bookkeeping - self._last_terminal = deferred_terminal - self._last_rule = deferred_rule - if deferred_value and not deferred_value[-1].isspace(): - self._last_char_space = False - - # clear the deferred item - self._deferred_item = None - - # potentially add a space before the next token - if self._should_add_space(rule, terminal, is_block_label): - yield " " - self._last_char_space = True - - # potentially defer the item if needed - if terminal in [Terminal("COMMA")]: - self._deferred_item = item - else: - # otherwise print the next token - yield value - - # and do our bookkeeping so we can make an informed - # decision about formatting next time - self._last_terminal = terminal - self._last_rule = rule - if value: - self._last_char_space = value[-1].isspace() - - else: - raise RuntimeError(f"Unknown bare token type: {item}") - - def reconstruct(self, tree, postproc=None, insert_spaces=False): - """Convert a Lark.Tree AST back into a string representation of HCL.""" - return Reconstructor.reconstruct( - self, - tree, - postproc, - insert_spaces, - ) - - -class HCLReverseTransformer: - """ - The reverse of hcl2.transformer.DictTransformer. This method attempts to - convert a dict back into a working AST, which can be written back out. - """ - - @staticmethod - def _name_to_identifier(name: str) -> Tree: - """Converts a string to a NAME token within an identifier rule.""" - return Tree(Token("RULE", "identifier"), [Token("NAME", name)]) - - @staticmethod - def _escape_interpolated_str(interp_s: str) -> str: - if interp_s.strip().startswith("<<-") or interp_s.strip().startswith("<<"): - # For heredoc strings, preserve their format exactly - return reverse_quotes_within_interpolation(interp_s) - # Escape backslashes first (very important to do this first) - escaped = interp_s.replace("\\", "\\\\") - # Escape quotes - escaped = escaped.replace('"', '\\"') - # Escape control characters - escaped = escaped.replace("\n", "\\n") - escaped = escaped.replace("\r", "\\r") - escaped = escaped.replace("\t", "\\t") - escaped = escaped.replace("\b", "\\b") - escaped = escaped.replace("\f", "\\f") - # find each interpolation within the string and remove the backslashes - interp_s = reverse_quotes_within_interpolation(f"{escaped}") - return interp_s - - @staticmethod - def _block_has_label(block: dict) -> bool: - return len(block.keys()) == 1 - - def __init__(self): - pass - - def transform(self, hcl_dict: dict) -> Tree: - """Given a dict, return a Lark.Tree representing the HCL AST.""" - level = 0 - body = self._transform_dict_to_body(hcl_dict, level) - start = Tree(Token("RULE", "start"), [body]) - return start - - @staticmethod - def _is_string_wrapped_tf(interp_s: str) -> bool: - """ - Determines whether a string is a complex HCL data structure - wrapped in ${ interpolation } characters. - """ - if not interp_s.startswith("${") or not interp_s.endswith("}"): - return False + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True - nested_tokens = [] - for match in re.finditer(r"\$?\{|}", interp_s): - if match.group(0) in ["${", "{"]: - nested_tokens.append(match.group(0)) - elif match.group(0) == "}": - nested_tokens.pop() - - # if we exit ${ interpolation } before the end of the string, - # this interpolated string has string parts and can't represent - # a valid HCL expression on its own (without quotes) - if len(nested_tokens) == 0 and match.end() != len(interp_s): - return False + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True - return True - - @classmethod - def _unwrap_interpolation(cls, value: str) -> str: - if cls._is_string_wrapped_tf(value): - return value[2:-1] - return value - - def _newline(self, level: int, count: int = 1) -> Tree: - return Tree( - Token("RULE", "new_line_or_comment"), - [Token("NL_OR_COMMENT", f"\n{' ' * level}") for _ in range(count)], - ) - - def _build_string_rule(self, string: str, level: int = 0) -> Tree: - # grammar in hcl2.lark defines that a string is built of any number of string parts, - # each string part can be either interpolation expression, escaped interpolation string - # or regular string - # this method build hcl2 string rule based on arbitrary string, - # splitting such string into individual parts and building a lark tree out of them - # - result = [] + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(string) if part != ""] - # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] - # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] - - for part in parts: - if part.startswith("$${") and part.endswith("}"): - result.append(Token("ESCAPED_INTERPOLATION", part)) - - # unwrap interpolation expression and recurse into it - elif part.startswith("${") and part.endswith("}"): - part = part[2:-1] - if part.startswith('"') and part.endswith('"'): - part = part[1:-1] - part = self._transform_value_to_expr_term(part, level) - else: - part = Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", part)])], - ) - - result.append(Tree(Token("RULE", "interpolation"), [part])) - - else: - result.append(Token("STRING_CHARS", part)) - - result = [Tree(Token("RULE", "string_part"), [element]) for element in result] - return Tree(Token("RULE", "string"), result) - - def _is_block(self, value: Any) -> bool: - if isinstance(value, dict): - block_body = value - if START_LINE_KEY in block_body.keys() or END_LINE_KEY in block_body.keys(): + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: return True - try: - # if block is labeled, actual body might be nested - # pylint: disable=W0612 - block_label, block_body = next(iter(value.items())) - except StopIteration: - # no more potential labels = nothing more to check + # Don't add space around operator tokens inside unary_op + if parent_rule_name == UnaryOpRule.lark_name(): return False - return self._is_block(block_body) + if ( + token_type in self._binary_op_types + or self._last_token_name in self._binary_op_types + ): + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data - if isinstance(value, list): - if len(value) > 0: - return self._is_block(value[0]) + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True return False - def _calculate_block_labels(self, block: dict) -> Tuple[List[str], dict]: - # if block doesn't have a label - if len(block.keys()) != 1: - return [], block - - # otherwise, find the label - curr_label = list(block)[0] - potential_body = block[curr_label] - - # __start_line__ and __end_line__ metadata are not labels - if ( - START_LINE_KEY in potential_body.keys() - or END_LINE_KEY in potential_body.keys() - ): - return [curr_label], potential_body - - # recurse and append the label - next_label, block_body = self._calculate_block_labels(potential_body) - return [curr_label] + next_label, block_body - - # pylint:disable=R0914 - def _transform_dict_to_body(self, hcl_dict: dict, level: int) -> Tree: - # we add a newline at the top of a body within a block, not the root body - # >2 here is to ignore the __start_line__ and __end_line__ metadata - if level > 0 and len(hcl_dict) > 2: - children = [self._newline(level)] + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == UnaryOpRule.lark_name(): + for i, child in enumerate(tree.children): + result.extend(self._reconstruct_node(child, rule_name)) + if i == 0: + # Suppress space between unary operator and its operand + self._last_was_space = True + + elif rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] else: - children = [] - - # iterate through each attribute or sub-block of this block - for key, value in hcl_dict.items(): - if key in [START_LINE_KEY, END_LINE_KEY]: - continue - - # construct the identifier, whether that be a block type name or an attribute key - identifier_name = self._name_to_identifier(key) - - # first, check whether the value is a "block" - if self._is_block(value): - for block_v in value: - block_labels, block_body_dict = self._calculate_block_labels( - block_v - ) - block_label_trees = [ - self._build_string_rule(block_label, level) - for block_label in block_labels - ] - block_body = self._transform_dict_to_body( - block_body_dict, level + 1 - ) - - # create our actual block to add to our own body - block = Tree( - Token("RULE", "block"), - [identifier_name] + block_label_trees + [block_body], - ) - children.append(block) - # add empty line after block - new_line = self._newline(level - 1) - # add empty line with indentation for next element in the block - new_line.children.append(self._newline(level).children[0]) - - children.append(new_line) - - # if the value isn't a block, it's an attribute - else: - expr_term = self._transform_value_to_expr_term(value, level) - attribute = Tree( - Token("RULE", "attribute"), - [identifier_name, Token("EQ", " ="), expr_term], - ) - children.append(attribute) - children.append(self._newline(level)) - - # since we're leaving a block body here, reduce the indentation of the - # final newline if it exists - if ( - len(children) > 0 - and isinstance(children[-1], Tree) - and children[-1].data.type == "RULE" - and children[-1].data.value == "new_line_or_comment" - ): - children[-1] = self._newline(level - 1) - - return Tree(Token("RULE", "body"), children) - - # pylint: disable=too-many-branches, too-many-return-statements too-many-statements - def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]: - """Transforms a value from a dictionary into an "expr_term" (a value in HCL2) - - Anything passed to this function is treated "naively". Any lists passed - are assumed to be tuples, and any dicts passed are assumed to be objects. - No more checks will be performed for either to see if they are "blocks" - as this check happens in `_transform_dict_to_body`. - """ - - # for lists, recursively turn the child elements into expr_terms and - # store within a tuple - if isinstance(value, list): - tuple_tree = Tree( - Token("RULE", "tuple"), - [ - self._transform_value_to_expr_term(tuple_v, level) - for tuple_v in value - ], - ) - return Tree(Token("RULE", "expr_term"), [tuple_tree]) - - if value is None: - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "identifier"), [Token("NAME", "null")])], - ) - - # for dicts, recursively turn the child k/v pairs into object elements - # and store within an object - if isinstance(value, dict): - elements = [] - - # if the object has elements, put it on a newline - if len(value) > 0: - elements.append(self._newline(level + 1)) - - # iterate through the items and add them to the object - for i, (k, dict_v) in enumerate(value.items()): - if k in [START_LINE_KEY, END_LINE_KEY]: - continue - - value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1) - k = self._unwrap_interpolation(k) - elements.append( - Tree( - Token("RULE", "object_elem"), - [ - Tree( - Token("RULE", "object_elem_key"), - [Tree(Token("RULE", "identifier"), [Token("NAME", k)])], - ), - Token("EQ", " ="), - value_expr_term, - ], - ) - ) - - # add indentation appropriately - if i < len(value) - 1: - elements.append(self._newline(level + 1)) - else: - elements.append(self._newline(level)) - return Tree( - Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elements)] - ) - - # treat booleans appropriately - if isinstance(value, bool): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "identifier"), - [Token("NAME", "true" if value else "false")], - ) - ], - ) - - # store integers as literals, digit by digit - if isinstance(value, int): - return Tree( - Token("RULE", "expr_term"), - [ - Tree( - Token("RULE", "int_lit"), - [Token("DECIMAL", digit) for digit in str(value)], - ) - ], - ) - - if isinstance(value, float): - value = str(value) - literal = [] - - if value[0] == "-": - # pop two first chars - minus and a digit - literal.append(Token("NEGATIVE_DECIMAL", value[:2])) - value = value[2:] - - while value != "": - char = value[0] - - if char == ".": - # current char marks beginning of decimal part: pop all remaining chars and end the loop - literal.append(Token("DOT", char)) - literal.extend(Token("DECIMAL", char) for char in value[1:]) - break - - if char == "e": - # current char marks beginning of e-notation: pop all remaining chars and end the loop - literal.append(Token("EXP_MARK", value)) - break - - literal.append(Token("DECIMAL", char)) - value = value[1:] - - return Tree( - Token("RULE", "expr_term"), - [Tree(Token("RULE", "float_lit"), literal)], - ) - - # store strings as single literals - if isinstance(value, str): - # potentially unpack a complex syntax structure - if self._is_string_wrapped_tf(value): - # we have to unpack it by parsing it - wrapped_value = re.match(r"\$\{(.*)}", value).group(1) # type:ignore - ast = reconstruction_parser().parse(f"value = {wrapped_value}") - - if ast.data != Token("RULE", "start"): - raise RuntimeError("Token must be `start` RULE") - - body = ast.children[0] - if body.data != Token("RULE", "body"): - raise RuntimeError("Token must be `body` RULE") - - attribute = body.children[0] - if attribute.data != Token("RULE", "attribute"): - raise RuntimeError("Token must be `attribute` RULE") - - if attribute.children[1] != Token("EQ", " ="): - raise RuntimeError("Token must be `EQ (=)` rule") - - parsed_value = attribute.children[2] - return parsed_value - - # otherwise it's a string - return Tree( - Token("RULE", "expr_term"), - [self._build_string_rule(self._escape_interpolated_str(value), level)], - ) - - # otherwise, we don't know the type - raise RuntimeError(f"Unknown type to transform {type(value)}") + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark deleted file mode 100644 index 63154efb..00000000 --- a/hcl2/rule_transformer/hcl2.lark +++ /dev/null @@ -1,207 +0,0 @@ -// ============================================================================ -// Terminals -// ============================================================================ - -// Whitespace and Comments -NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ - -// Keywords -IF : "if" -IN : "in" -FOR : "for" -FOR_EACH : "for_each" - - -// Literals -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ -STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ -DECIMAL : "0".."9" -NEGATIVE_DECIMAL : "-" DECIMAL -EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ -INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ -FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? - | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) - -// Operators -DOUBLE_EQ : "==" -NEQ : "!=" -LT : "<" -GT : ">" -LEQ : "<=" -GEQ : ">=" -MINUS : "-" -ASTERISK : "*" -SLASH : "/" -PERCENT : "%" -DOUBLE_AMP : "&&" -DOUBLE_PIPE : "||" -PLUS : "+" -NOT : "!" -QMARK : "?" - -// Punctuation -LPAR : "(" -RPAR : ")" -LBRACE : "{" -RBRACE : "}" -LSQB : "[" -RSQB : "]" -COMMA : "," -DOT : "." -EQ : /[ \t]*=(?!=|>)/ -COLON : ":" -DBLQUOTE : "\"" - -// Interpolation -INTERP_START : "${" - -// Splat Operators -ATTR_SPLAT : ".*" -FULL_SPLAT_START : "[*]" - -// Special Operators -FOR_OBJECT_ARROW : "=>" -ELLIPSIS : "..." -COLONS: "::" - -// Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ - -// Ignore whitespace (but not newlines, as they're significant in HCL) -%ignore /[ \t]+/ - -// ============================================================================ -// Rules -// ============================================================================ - -// Top-level structure -start : body - -// Body and basic constructs -body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression -block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE - -// Whitespace and comments -new_line_or_comment: ( NL_OR_COMMENT )+ - -// Basic literals and identifiers -identifier : NAME -keyword: IN | FOR | IF | FOR_EACH -int_lit: INT_LITERAL -float_lit: FLOAT_LITERAL -string: DBLQUOTE string_part* DBLQUOTE -string_part: STRING_CHARS - | ESCAPED_INTERPOLATION - | interpolation - -// Expressions -?expression : or_expr QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression -> conditional - | or_expr -interpolation: INTERP_START expression RBRACE - -// Operator precedence ladder (lowest to highest) -// Each level uses left recursion for left-associativity. -// Rule aliases (-> binary_op, -> binary_term, -> binary_operator) maintain -// transformer compatibility with BinaryOpRule / BinaryTermRule / BinaryOperatorRule. - -// Logical OR -?or_expr : or_expr or_binary_term new_line_or_comment? -> binary_op - | and_expr -or_binary_term : or_binary_operator new_line_or_comment? and_expr -> binary_term -!or_binary_operator : DOUBLE_PIPE -> binary_operator - -// Logical AND -?and_expr : and_expr and_binary_term new_line_or_comment? -> binary_op - | eq_expr -and_binary_term : and_binary_operator new_line_or_comment? eq_expr -> binary_term -!and_binary_operator : DOUBLE_AMP -> binary_operator - -// Equality -?eq_expr : eq_expr eq_binary_term new_line_or_comment? -> binary_op - | rel_expr -eq_binary_term : eq_binary_operator new_line_or_comment? rel_expr -> binary_term -!eq_binary_operator : DOUBLE_EQ -> binary_operator - | NEQ -> binary_operator - -// Relational -?rel_expr : rel_expr rel_binary_term new_line_or_comment? -> binary_op - | add_expr -rel_binary_term : rel_binary_operator new_line_or_comment? add_expr -> binary_term -!rel_binary_operator : LT -> binary_operator - | GT -> binary_operator - | LEQ -> binary_operator - | GEQ -> binary_operator - -// Additive -?add_expr : add_expr add_binary_term new_line_or_comment? -> binary_op - | mul_expr -add_binary_term : add_binary_operator new_line_or_comment? mul_expr -> binary_term -!add_binary_operator : PLUS -> binary_operator - | MINUS -> binary_operator - -// Multiplicative -?mul_expr : mul_expr mul_binary_term new_line_or_comment? -> binary_op - | unary_expr -mul_binary_term : mul_binary_operator new_line_or_comment? unary_expr -> binary_term -!mul_binary_operator : ASTERISK -> binary_operator - | SLASH -> binary_operator - | PERCENT -> binary_operator - -// Unary (highest precedence for operations) -?unary_expr : unary_op | expr_term -!unary_op : (MINUS | NOT) expr_term - -// Expression terms -expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR - | float_lit - | int_lit - | string - | tuple - | object - | identifier - | function_call - | heredoc_template - | heredoc_template_trim - | index_expr_term - | get_attr_expr_term - | attr_splat_expr_term - | full_splat_expr_term - | for_tuple_expr - | for_object_expr - -// Collections -tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB -object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE -object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression -object_elem_key_expression : LPAR expression RPAR -object_elem_key_dot_accessor : identifier (DOT identifier)+ - -// Heredocs -heredoc_template : HEREDOC_TEMPLATE -heredoc_template_trim : HEREDOC_TEMPLATE_TRIM - -// Functions -function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) - -// Indexing and attribute access -index_expr_term : expr_term index -get_attr_expr_term : expr_term get_attr -attr_splat_expr_term : expr_term attr_splat -full_splat_expr_term : expr_term full_splat -?index : braces_index | short_index -braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB -short_index : DOT INT_LITERAL -get_attr : DOT identifier -attr_splat : ATTR_SPLAT (get_attr | index)* -full_splat : FULL_SPLAT_START (get_attr | index)* - -// For expressions -!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB -!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE -!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? -!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py deleted file mode 100644 index 647b6683..00000000 --- a/hcl2/rule_transformer/json.py +++ /dev/null @@ -1,12 +0,0 @@ -from json import JSONEncoder -from typing import Any - -from hcl2.rule_transformer.rules.abstract import LarkRule - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py deleted file mode 100644 index 099beead..00000000 --- a/hcl2/rule_transformer/reconstructor.py +++ /dev/null @@ -1,238 +0,0 @@ -from typing import List, Union - -from lark import Tree, Token -from hcl2.rule_transformer.rules import tokens -from hcl2.rule_transformer.rules.base import BlockRule -from hcl2.rule_transformer.rules.for_expressions import ForIntroRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.expressions import ( - ExprTermRule, - ConditionalRule, - UnaryOpRule, -) - - -class HCLReconstructor: - """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" - - _binary_op_types = { - "DOUBLE_EQ", - "NEQ", - "LT", - "GT", - "LEQ", - "GEQ", - "MINUS", - "ASTERISK", - "SLASH", - "PERCENT", - "DOUBLE_AMP", - "DOUBLE_PIPE", - "PLUS", - } - - def __init__(self): - self._reset_state() - - def _reset_state(self): - """State tracking for formatting decisions""" - self._last_was_space = True - self._current_indent = 0 - self._last_token_name = None - self._last_rule_name = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False - - def _should_add_space_before( - self, current_node: Union[Tree, Token], parent_rule_name: str = None - ) -> bool: - """Determine if we should add a space before the current token/rule.""" - - # Don't add space if we already have one - if self._last_was_space: - return False - - # Don't add space at the beginning - if self._last_token_name is None: - return False - - if isinstance(current_node, Token): - token_type = current_node.type - - # Space before '{' in blocks - if ( - token_type == tokens.LBRACE.lark_name() - and parent_rule_name == BlockRule.lark_name() - ): - return True - - # Space around Conditional Expression operators - if ( - parent_rule_name == ConditionalRule.lark_name() - and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - or self._last_token_name - in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] - ): - return True - - # Space after - if ( - parent_rule_name == ForIntroRule.lark_name() - and token_type == tokens.COLON.lark_name() - ): - - return True - - # Space after commas in tuples and function arguments... - if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): - return False - return True - - if token_type in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - tokens.ELLIPSIS.lark_name(), - ]: - return True - - if ( - self._last_token_name - in [ - tokens.FOR.lark_name(), - tokens.IN.lark_name(), - tokens.IF.lark_name(), - ] - and token_type != "NL_OR_COMMENT" - ): - return True - - # Space around for_object arrow - if tokens.FOR_OBJECT_ARROW.lark_name() in [ - token_type, - self._last_token_name, - ]: - return True - - # Space after ellipsis in function arguments - if self._last_token_name == tokens.ELLIPSIS.lark_name(): - return True - - if tokens.EQ.lark_name() in [token_type, self._last_token_name]: - return True - - # Don't add space around operator tokens inside unary_op - if parent_rule_name == UnaryOpRule.lark_name(): - return False - - if ( - token_type in self._binary_op_types - or self._last_token_name in self._binary_op_types - ): - return True - - elif isinstance(current_node, Tree): - rule_name = current_node.data - - if parent_rule_name == BlockRule.lark_name(): - # Add space between multiple string/identifier labels in blocks - if rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ] and self._last_rule_name in [ - StringRule.lark_name(), - IdentifierRule.lark_name(), - ]: - return True - - return False - - def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: - """Recursively reconstruct a Tree node into HCL text fragments.""" - result = [] - rule_name = tree.data - - if rule_name == UnaryOpRule.lark_name(): - for i, child in enumerate(tree.children): - result.extend(self._reconstruct_node(child, rule_name)) - if i == 0: - # Suppress space between unary operator and its operand - self._last_was_space = True - - elif rule_name == ExprTermRule.lark_name(): - # Check if parenthesized - if ( - len(tree.children) >= 3 - and isinstance(tree.children[0], Token) - and tree.children[0].type == tokens.LPAR.lark_name() - and isinstance(tree.children[-1], Token) - and tree.children[-1].type == tokens.RPAR.lark_name() - ): - self._in_parentheses = True - - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - self._in_parentheses = False - - else: - for child in tree.children: - result.extend(self._reconstruct_node(child, rule_name)) - - if self._should_add_space_before(tree, parent_rule_name): - result.insert(0, " ") - - # Update state tracking - self._last_rule_name = rule_name - if result: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: - """Reconstruct a Token node into HCL text fragments.""" - result = str(token.value) - if self._should_add_space_before(token, parent_rule_name): - result = " " + result - - self._last_token_name = token.type - if len(token) != 0: - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") - - return result - - def _reconstruct_node( - self, node: Union[Tree, Token], parent_rule_name: str = None - ) -> List[str]: - """Reconstruct any node (Tree or Token) into HCL text fragments.""" - if isinstance(node, Tree): - return self._reconstruct_tree(node, parent_rule_name) - elif isinstance(node, Token): - return [self._reconstruct_token(node, parent_rule_name)] - else: - # Fallback: convert to string - return [str(node)] - - def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: - """Convert a Lark.Tree AST back into a string representation of HCL.""" - # Reset state - self._reset_state() - - # Reconstruct the tree - fragments = self._reconstruct_node(tree) - - # Join fragments and apply post-processing - result = "".join(fragments) - - if postproc: - result = postproc(result) - - # Ensure file ends with newline - if result and not result.endswith("\n"): - result += "\n" - - return result diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rules/__init__.py similarity index 100% rename from hcl2/rule_transformer/__init__.py rename to hcl2/rules/__init__.py diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rules/abstract.py similarity index 97% rename from hcl2/rule_transformer/rules/abstract.py rename to hcl2/rules/abstract.py index e83fed2b..a494d901 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -5,7 +5,7 @@ from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.utils import SerializationOptions, SerializationContext class LarkElement(ABC): diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rules/base.py similarity index 88% rename from hcl2/rule_transformer/rules/base.py rename to hcl2/rules/base.py index c879b772..a025949a 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rules/base.py @@ -4,14 +4,14 @@ from lark.tree import Meta from hcl2.const import IS_BLOCK -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE - -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE + +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rules/containers.py similarity index 93% rename from hcl2/rule_transformer/rules/containers.py rename to hcl2/rules/containers.py index a2f53436..4d7310c8 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rules/containers.py @@ -1,14 +1,14 @@ from typing import Tuple, List, Optional, Union, Any -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, ) -from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.strings import StringRule +from hcl2.rules.tokens import ( COLON, EQ, LBRACE, @@ -20,11 +20,11 @@ RPAR, DOT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rules/expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/expressions.py rename to hcl2/rules/expressions.py index db256e82..1e1d0cd8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -4,16 +4,16 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import ( +from hcl2.rules.abstract import ( LarkToken, ) -from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.literal_rules import BinaryOperatorRule +from hcl2.rules.tokens import LPAR, RPAR, QMARK, COLON +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( wrap_into_parentheses, to_dollar_string, SerializationOptions, diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rules/for_expressions.py similarity index 95% rename from hcl2/rule_transformer/rules/for_expressions.py rename to hcl2/rules/for_expressions.py index 3a89aba3..a1f24dcb 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -2,10 +2,10 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule, LarkElement +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( LSQB, RSQB, LBRACE, @@ -18,11 +18,11 @@ ELLIPSIS, FOR_OBJECT_ARROW, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rules/functions.py similarity index 90% rename from hcl2/rule_transformer/rules/functions.py rename to hcl2/rules/functions.py index 92cc8b11..380b959b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rules/functions.py @@ -1,14 +1,14 @@ from functools import lru_cache from typing import Any, Optional, Tuple, Union, List -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rules/indexing.py similarity index 94% rename from hcl2/rule_transformer/rules/indexing.py rename to hcl2/rules/indexing.py index 20decf00..fc8cbf90 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -2,21 +2,21 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule -from hcl2.rule_transformer.rules.literal_rules import IdentifierRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( DOT, IntLiteral, LSQB, RSQB, ATTR_SPLAT, ) -from hcl2.rule_transformer.rules.whitespace import ( +from hcl2.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, to_dollar_string, SerializationContext, diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rules/literal_rules.py similarity index 85% rename from hcl2/rule_transformer/rules/literal_rules.py rename to hcl2/rules/literal_rules.py index baf8546f..2e5b8281 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -1,8 +1,8 @@ from abc import ABC from typing import Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkRule, LarkToken +from hcl2.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rules/strings.py similarity index 94% rename from hcl2/rule_transformer/rules/strings.py rename to hcl2/rules/strings.py index 4e28e976..248ab173 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rules/strings.py @@ -1,9 +1,9 @@ import sys from typing import Tuple, List, Any, Union -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, @@ -12,7 +12,7 @@ HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) -from hcl2.rule_transformer.utils import ( +from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rules/tokens.py similarity index 98% rename from hcl2/rule_transformer/rules/tokens.py rename to hcl2/rules/tokens.py index ba948d3e..b02be66e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,7 +1,7 @@ from functools import lru_cache from typing import Callable, Any, Type, Optional, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken +from hcl2.rules.abstract import LarkToken class StringToken(LarkToken): diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rules/tree.py similarity index 100% rename from hcl2/rule_transformer/rules/tree.py rename to hcl2/rules/tree.py diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rules/whitespace.py similarity index 90% rename from hcl2/rule_transformer/rules/whitespace.py rename to hcl2/rules/whitespace.py index 62069b78..5f2fa886 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -1,9 +1,9 @@ from abc import ABC from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule -from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext +from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.literal_rules import TokenRule +from hcl2.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/transformer.py similarity index 93% rename from hcl2/rule_transformer/transformer.py rename to hcl2/transformer.py index 931eab8e..07230fe5 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/transformer.py @@ -2,13 +2,13 @@ from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from hcl2.rule_transformer.rules.base import ( +from hcl2.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) -from hcl2.rule_transformer.rules.containers import ( +from hcl2.rules.containers import ( ObjectRule, ObjectElemRule, ObjectElemKeyRule, @@ -16,21 +16,21 @@ ObjectElemKeyExpressionRule, ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expressions import ( +from hcl2.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) -from hcl2.rule_transformer.rules.for_expressions import ( +from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, ForIntroRule, ForCondRule, ) -from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule -from hcl2.rule_transformer.rules.indexing import ( +from hcl2.rules.functions import ArgumentsRule, FunctionCallRule +from hcl2.rules.indexing import ( IndexExprTermRule, SqbIndexRule, ShortIndexRule, @@ -41,28 +41,28 @@ FullSplatRule, FullSplatExprTermRule, ) -from hcl2.rule_transformer.rules.literal_rules import ( +from hcl2.rules.literal_rules import ( FloatLitRule, IntLitRule, IdentifierRule, BinaryOperatorRule, KeywordRule, ) -from hcl2.rule_transformer.rules.strings import ( +from hcl2.rules.strings import ( InterpolationRule, StringRule, StringPartRule, HeredocTemplateRule, HeredocTrimTemplateRule, ) -from hcl2.rule_transformer.rules.tokens import ( +from hcl2.rules.tokens import ( NAME, IntLiteral, FloatLiteral, StringToken, StaticStringToken, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.whitespace import NewLineOrCommentRule class RuleTransformer(Transformer): diff --git a/hcl2/rule_transformer/utils.py b/hcl2/utils.py similarity index 100% rename from hcl2/rule_transformer/utils.py rename to hcl2/utils.py From ba80334cd0ab6c567f425cd3813e5ed98132880c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 16:51:26 +0100 Subject: [PATCH 14/42] minor improvements to deserializer.py and formatter.py --- hcl2/deserializer.py | 43 +++++++++++++----------------- hcl2/formatter.py | 63 ++++++++++++++------------------------------ 2 files changed, 39 insertions(+), 67 deletions(-) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 2290809c..d6b4d4c2 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,8 +1,8 @@ import json from abc import ABC, abstractmethod from dataclasses import dataclass -from functools import lru_cache -from typing import Any, TextIO, List, Union, Optional +from functools import cached_property +from typing import Any, TextIO, List, Union from regex import regex @@ -55,7 +55,6 @@ HEREDOC_TEMPLATE, COLON, ) -from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.transformer import RuleTransformer from hcl2.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN @@ -83,11 +82,8 @@ def load(self, file: TextIO) -> LarkElement: class BaseDeserializer(LarkElementTreeDeserializer): def __init__(self, options=None): super().__init__(options) - self._current_line = 1 - self._last_new_line: Optional[NewLineOrCommentRule] = None - @property - @lru_cache + @cached_property def _transformer(self) -> RuleTransformer: return RuleTransformer() @@ -119,27 +115,29 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): - if self._is_block(value): + for key, val in value.items(): + if self._is_block(val): # this value is a list of blocks, iterate over each block and deserialize them - for block in value: + for block in val: children.append(self._deserialize_block(key, block)) else: # otherwise it's just an attribute if key != IS_BLOCK: - children.append(self._deserialize_attribute(key, value)) + children.append(self._deserialize_attribute(key, val)) return children def _deserialize_text(self, value: Any) -> LarkRule: - try: - int_val = int(value) - if "." in str(value): - return FloatLitRule([FloatLiteral(float(value))]) - return IntLitRule([IntLiteral(int_val)]) - except ValueError: - pass + # bool must be checked before int since bool is a subclass of int + if isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + if isinstance(value, float): + return FloatLitRule([FloatLiteral(value)]) + + if isinstance(value, int): + return IntLitRule([IntLiteral(value)]) if isinstance(value, str): if value.startswith('"') and value.endswith('"'): @@ -160,9 +158,6 @@ def _deserialize_text(self, value: Any) -> LarkRule: return self._deserialize_identifier(value) - elif isinstance(value, bool): - return self._deserialize_identifier(str(value).lower()) - return self._deserialize_identifier(str(value)) def _deserialize_identifier(self, value: str) -> IdentifierRule: @@ -283,8 +278,8 @@ def _deserialize_list(self, value: List) -> TupleRule: def _deserialize_object(self, value: dict) -> ObjectRule: children = [] - for key, value in value.items(): - children.append(self._deserialize_object_elem(key, value)) + for key, val in value.items(): + children.append(self._deserialize_object_elem(key, val)) if self.options.object_elements_trailing_comma: children.append(COMMA()) @@ -342,6 +337,6 @@ def _contains_block_marker(self, obj: dict) -> bool: return True if isinstance(value, list): for element in value: - if self._contains_block_marker(element): + if isinstance(element, dict) and self._contains_block_marker(element): return True return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 205d2ddd..35fb6b05 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -42,8 +42,7 @@ def format_tree(self, tree: LarkElement): class BaseFormatter(LarkElementTreeFormatter): def __init__(self, options: FormatterOptions = None): super().__init__(options) - self._current_line = 1 - self._current_indent_level = 0 + self._last_new_line: NewLineOrCommentRule = None def format_tree(self, tree: LarkElement): if isinstance(tree, StartRule): @@ -51,9 +50,6 @@ def format_tree(self, tree: LarkElement): def format_start_rule(self, rule: StartRule): self.format_body_rule(rule.body, 0) - # for child in rule.body.children: - # if isinstance(child, BlockRule): - # self.format_block_rule(child, 1) def format_block_rule(self, rule: BlockRule, indent_level: int = 0): if self.options.vertically_align_attributes: @@ -87,7 +83,8 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): new_children.insert(-2, self._build_newline(indent_level)) new_children.append(self._build_newline(indent_level, 2)) - new_children.pop(-1) + if new_children: + new_children.pop(-1) rule._children = new_children def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): @@ -158,7 +155,7 @@ def format_expression(self, rule: ExprTermRule, indent_level: int = 0): self.format_forobjectexpr(rule.expression, indent_level) elif isinstance(rule.expression, ExprTermRule): - self.format_expression(rule.expression) + self.format_expression(rule.expression, indent_level) def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): for child in expression.children: @@ -169,7 +166,6 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = for index in indexes: expression.children[index] = self._build_newline(indent_level) self._deindent_last_line() - # expression.children[8] = self._build_newline(indent_level - 1) def format_forobjectexpr( self, expression: ForObjectExprRule, indent_level: int = 0 @@ -192,23 +188,28 @@ def _vertically_align_attributes_in_body(self, body: BodyRule): attributes_sequence.append(child) elif attributes_sequence: - max_length = max( - len(attribute.identifier.token.value) - for attribute in attributes_sequence - ) - for attribute in attributes_sequence: - name_length = len(attribute.identifier.token.value) - spaces_to_add = max_length - name_length - attribute.children[1].set_value( - " " * spaces_to_add + attribute.children[1].value - ) + self._align_attributes_sequence(attributes_sequence) attributes_sequence = [] + if attributes_sequence: + self._align_attributes_sequence(attributes_sequence) + + def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(len(elem.key.serialize()) for elem in rule.elements) for elem in rule.elements: key_length = len(elem.key.serialize()) - print(elem.key.serialize(), key_length) spaces_to_add = max_length - key_length @@ -218,17 +219,6 @@ def _vertically_align_object_elems(self, rule: ObjectRule): elem.children[1].set_value(" " * spaces_to_add + separator.value) - def _move_to_next_line(self, times: int = 1): - self._current_line += times - - def _increase_indent_level(self, times: int = 1): - self._current_indent_level += times - - def _decrease_indent_level(self, times: int = 1): - self._current_indent_level -= times - if self._current_indent_level < 0: - self._current_indent_level = 0 - def _build_newline( self, next_line_indent: int = 0, count: int = 1 ) -> NewLineOrCommentRule: @@ -247,16 +237,3 @@ def _deindent_last_line(self, times: int = 1): for i in range(times): if token.value.endswith(" " * self.options.indent_length): token.set_value(token.value[: -self.options.indent_length]) - - # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: - # result = Meta() - # result.empty = length == 0 - # result.line = self._current_line - # result.column = indent_level * self.options.indent_length - # # result.start_pos = - # # result.end_line = - # # result.end_column = - # # result.end_pos = - # # result.orig_expansion = - # # result.match_tree = - # return result From e32d3e3028b3f808c9c3f865135bffe25aaa1b5c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 17:50:06 +0100 Subject: [PATCH 15/42] add round-trip test suite --- test/round_trip/__init__.py | 0 .../hcl2_original/operator_precedence.tf | 15 ++ test/round_trip/hcl2_original/smoke.tf | 72 ++++++ .../hcl2_reconstructed/operator_precedence.tf | 15 ++ test/round_trip/hcl2_reconstructed/smoke.tf | 64 +++++ .../operator_precedence.json | 20 ++ test/round_trip/json_reserialized/smoke.json | 70 ++++++ .../json_serialized/operator_precedence.json | 20 ++ test/round_trip/json_serialized/smoke.json | 70 ++++++ .../special/operator_precedence.json | 20 ++ test/round_trip/test_round_trip.py | 224 ++++++++++++++++++ test/unit/__init__.py | 1 - test/unit/test_builder.py | 110 --------- test/unit/test_dict_transformer.py | 32 --- test/unit/test_hcl2_syntax.py | 193 --------------- test/unit/test_load.py | 57 ----- test/unit/test_load_with_meta.py | 23 -- test/unit/test_reconstruct_ast.py | 112 --------- test/unit/test_reconstruct_dict.py | 88 ------- 19 files changed, 590 insertions(+), 616 deletions(-) create mode 100644 test/round_trip/__init__.py create mode 100644 test/round_trip/hcl2_original/operator_precedence.tf create mode 100644 test/round_trip/hcl2_original/smoke.tf create mode 100644 test/round_trip/hcl2_reconstructed/operator_precedence.tf create mode 100644 test/round_trip/hcl2_reconstructed/smoke.tf create mode 100644 test/round_trip/json_reserialized/operator_precedence.json create mode 100644 test/round_trip/json_reserialized/smoke.json create mode 100644 test/round_trip/json_serialized/operator_precedence.json create mode 100644 test/round_trip/json_serialized/smoke.json create mode 100644 test/round_trip/special/operator_precedence.json create mode 100644 test/round_trip/test_round_trip.py delete mode 100644 test/unit/__init__.py delete mode 100644 test/unit/test_builder.py delete mode 100644 test/unit/test_dict_transformer.py delete mode 100644 test/unit/test_hcl2_syntax.py delete mode 100644 test/unit/test_load.py delete mode 100644 test/unit/test_load_with_meta.py delete mode 100644 test/unit/test_reconstruct_ast.py delete mode 100644 test/unit/test_reconstruct_dict.py diff --git a/test/round_trip/__init__.py b/test/round_trip/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/round_trip/hcl2_original/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/round_trip/hcl2_original/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/round_trip/hcl2_original/smoke.tf new file mode 100644 index 00000000..d741a6ac --- /dev/null +++ b/test/round_trip/hcl2_original/smoke.tf @@ -0,0 +1,72 @@ + +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (- a) + e = ( + a == b + ? true : false + ) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10 + , + "k3" = {k4 = "a"} + (5 + 5) = "d" + k5.attr.attr = "e" + } + i = [ + a, b + , + "c${aaa}", + d, + [1, 2, 3,], + f(a), + provider::func::aa(5) + + ] + j = func( + a, b + , c, + d ... + + ) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[ + forwarding_rule_key + ].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + ] + } + ... + if + substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/round_trip/hcl2_reconstructed/operator_precedence.tf new file mode 100644 index 00000000..323759aa --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf new file mode 100644 index 00000000..b5c54e96 --- /dev/null +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -0,0 +1,64 @@ +block label1 label2 { + a = 5 + b = 1256.5 + c = 15 + (10 * 12) + d = (-a) + e = (a == b ? true : false) + f = "${"this is a string"}" + g = 1 == 2 + h = { + k1 = 5, + k2 = 10, + "k3" = { + k4 = "a", + }, + (5 + 5) = "d", + k5.attr.attr = "e", + } + i = [ + a, + b, + "c${aaa}", + d, + [ + 1, + 2, + 3, + ], + f(a), + provider::func::aa(), + ] + j = func(a, b, c, d) + k = a.b.5 + l = a.*.b + m = a[*][c].a.*.1 + + block b1 { + a = 1 + } +} + + +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} + + +block { + route53_forwarding_rule_shares = { + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + "${forwarding_rule_key}" => { + aws_account_ids = [ + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] + + ] + } ... if substr(bucket_name, 0, 1) == "l" + } +} diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/round_trip/json_reserialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_reserialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_reserialized/smoke.json b/test/round_trip/json_reserialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_reserialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/round_trip/json_serialized/operator_precedence.json new file mode 100644 index 00000000..5c611ea7 --- /dev/null +++ b/test/round_trip/json_serialized/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${var.env == \"prod\" && var.debug}", + "and_before_ternary": "${true && true ? 1 : 0}", + "mixed_arith_cmp": "${var.a + var.b * var.c > 10}", + "full_chain": "${a + b == c && d || e}", + "left_assoc_sub": "${a - b - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${!a && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/json_serialized/smoke.json b/test/round_trip/json_serialized/smoke.json new file mode 100644 index 00000000..48544f85 --- /dev/null +++ b/test/round_trip/json_serialized/smoke.json @@ -0,0 +1,70 @@ +{ + "block": [ + { + "label1": { + "label2": { + "a": 5, + "b": 1256.5, + "c": "${15 + (10 * 12)}", + "d": "${(-a)}", + "e": "${(a == b ? true : false)}", + "f": "\"${\"this is a string\"}\"", + "g": "${1 == 2}", + "h": { + "k1": 5, + "k2": 10, + "\"k3\"": { + "k4": "\"a\"" + }, + "${(5 + 5)}": "\"d\"", + "k5.attr.attr": "\"e\"" + }, + "i": [ + "a", + "b", + "\"c${aaa}\"", + "d", + [ + 1, + 2, + 3 + ], + "${f(a)}", + "${provider::func::aa()}" + ], + "j": "${func(a, b, c, d)}", + "k": "${a.b.5}", + "l": "${a.*.b}", + "m": "${a[*][c].a.*.1}", + "block": [ + { + "b1": { + "a": 1, + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + }, + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + }, + { + "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/round_trip/special/operator_precedence.json new file mode 100644 index 00000000..35adb5bb --- /dev/null +++ b/test/round_trip/special/operator_precedence.json @@ -0,0 +1,20 @@ +{ + "locals": [ + { + "addition_1": "${((a + b) + c)}", + "addition_2": "${a + b}", + "addition_3": "${(a + b)}", + "eq_before_and": "${(var.env == \"prod\") && var.debug}", + "and_before_ternary": "${(true && true) ? 1 : 0}", + "mixed_arith_cmp": "${(var.a + (var.b * var.c)) > 10}", + "full_chain": "${(((a + b) == c) && d) || e}", + "left_assoc_sub": "${(a - b) - c}", + "left_assoc_mul_div": "${(a * b) / c}", + "nested_ternary": "${(a ? b : c) ? d : e}", + "unary_precedence": "${(!a) && b}", + "neg_precedence": "${(-a) + b}", + "neg_parentheses": "${-(a + b)}", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py new file mode 100644 index 00000000..b43340b6 --- /dev/null +++ b/test/round_trip/test_round_trip.py @@ -0,0 +1,224 @@ +"""Round-trip tests for the HCL2 → JSON → HCL2 pipeline. + +Every test starts from the source HCL files in test/round_trip/hcl2/ and +runs the pipeline forward from there, comparing actuals against expected +outputs at each stage: + +1. HCL → JSON serialization (parse + transform + serialize) +2. JSON → JSON reserialization (serialize + deserialize + reserialize) +3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) +4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) +""" + +import json +from enum import Enum +from pathlib import Path +from typing import List +from unittest import TestCase + +from hcl2 import parses +from hcl2.deserializer import BaseDeserializer +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + +ROUND_TRIP_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" + +_STEP_DIRS = { + "hcl2_original": HCL2_ORIGINAL_DIR, + "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", + "json_serialized": ROUND_TRIP_DIR / "json_serialized", + "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", +} + +_STEP_SUFFIXES = { + "hcl2_original": ".tf", + "hcl2_reconstructed": ".tf", + "json_serialized": ".json", + "json_reserialized": ".json", + "json_operator_precedence": ".json", +} + + +class SuiteStep(Enum): + ORIGINAL = "hcl2_original" + RECONSTRUCTED = "hcl2_reconstructed" + JSON_SERIALIZED = "json_serialized" + JSON_RESERIALIZED = "json_reserialized" + JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" + + +def _get_suites() -> List[str]: + """ + Get a list of the test suites. + Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + + Override SUITES to run a specific subset, e.g. SUITES = ["config"] + """ + return SUITES or sorted( + file.stem for file in HCL2_ORIGINAL_DIR.iterdir() if file.is_file() + ) + + +# set this to arbitrary list of test suites to run, +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +SUITES: List[str] = [] + + +def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: + """Return the path for a given suite name and pipeline step.""" + return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) + + +def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: + """Parse HCL text and serialize to a Python dict.""" + parsed_tree = parses(hcl_text) + rules = RuleTransformer().transform(parsed_tree) + if options: + return rules.serialize(options=options) + return rules.serialize() + + +def _deserialize_and_reserialize(serialized: dict) -> dict: + """Deserialize a Python dict back through the rule tree and reserialize.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + return deserialized.serialize() + + +def _deserialize_and_reconstruct(serialized: dict) -> str: + """Deserialize a Python dict and reconstruct HCL text.""" + deserializer = BaseDeserializer() + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestRoundTripSerialization(TestCase): + """Test HCL2 → JSON serialization: parse HCL, transform, serialize, compare with expected JSON.""" + + maxDiff = None + + def test_hcl_to_json(self): + for suite in _get_suites(): + yield self.check_hcl_to_json, suite + + def check_hcl_to_json(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) + + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) + + +class TestRoundTripReserialization(TestCase): + """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + + maxDiff = None + + def test_json_reserialization(self): + for suite in _get_suites(): + yield self.check_json_reserialization, suite + + def check_json_reserialization(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) + + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) + + +class TestRoundTripReconstruction(TestCase): + """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + + maxDiff = None + + def test_json_to_hcl(self): + for suite in _get_suites(): + yield self.check_json_to_hcl, suite + + def check_json_to_hcl(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) + + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) + + +class TestRoundTripFull(TestCase): + """Test full round-trip: HCL → JSON → HCL → JSON should produce matching JSON.""" + + maxDiff = None + + def test_full_round_trip(self): + for suite in _get_suites(): + yield self.check_full_round_trip, suite + + def check_full_round_trip(self, suite: str): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() + + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) + + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) + + # Re-parse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) + + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) diff --git a/test/unit/__init__.py b/test/unit/__init__.py deleted file mode 100644 index c497b297..00000000 --- a/test/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Unit tests -- tests that verify the code of this egg in isolation""" diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py deleted file mode 100644 index 2ce0cfed..00000000 --- a/test/unit/test_builder.py +++ /dev/null @@ -1,110 +0,0 @@ -# pylint:disable=C0116 - -"""Test building an HCL file from scratch""" - -from pathlib import Path -from unittest import TestCase - -import hcl2 -import hcl2.builder - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestBuilder(TestCase): - """Test building a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_build_blocks_tf(self): - nested_builder = hcl2.Builder() - nested_builder.block("nested_block_1", ["a"], foo="bar") - nested_builder.block("nested_block_1", ["a", "b"], bar="foo") - nested_builder.block("nested_block_1", foobar="barfoo") - nested_builder.block("nested_block_2", barfoo="foobar") - - builder = hcl2.Builder() - builder.block("block", a=1) - builder.block("block", ["label"], __nested_builder__=nested_builder, b=2) - - self.compare_filenames(builder, "blocks.tf") - - def test_build_escapes_tf(self): - builder = hcl2.Builder() - - builder.block("block", ["block_with_newlines"], a="line1\nline2") - - self.compare_filenames(builder, "escapes.tf") - - def test_locals_embdedded_condition_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - terraform={ - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": None, - }, - ) - - self.compare_filenames(builder, "locals_embedded_condition.tf") - - def test_locals_embedded_function_tf(self): - builder = hcl2.Builder() - - function_test = ( - "${var.basename}-${var.forwarder_function_name}_" - '${md5("${var.vpc_id}${data.aws_region.current.name}")}' - ) - builder.block("locals", function_test=function_test) - - self.compare_filenames(builder, "locals_embedded_function.tf") - - def test_locals_embedded_interpolation_tf(self): - builder = hcl2.Builder() - - attributes = { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); " - '${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo', - "deeply_nested_interpolation": 'prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}', - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": '${"bar"}$${baz:bat}', - "simple_and_escaped_reversed": '$${baz:bat}${"bar"}', - "nested_escaped": 'bar-${"$${baz:bat}"}', - } - - builder.block("locals", **attributes) - - self.compare_filenames(builder, "string_interpolations.tf") - - def test_provider_function_tf(self): - builder = hcl2.Builder() - - builder.block( - "locals", - name2='${provider::test2::test("a")}', - name3='${test("a")}', - ) - - self.compare_filenames(builder, "provider_function.tf") - - def compare_filenames(self, builder: hcl2.Builder, filename: str): - hcl_dict = builder.build() - hcl_ast = hcl2.reverse_transform(hcl_dict) - hcl_content_built = hcl2.writes(hcl_ast) - - hcl_path = (HCL2_DIR / filename).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - self.assertMultiLineEqual( - hcl_content_built, - hcl_file_content, - f"file {filename} does not match its programmatically built version.", - ) diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py deleted file mode 100644 index baad5ba9..00000000 --- a/test/unit/test_dict_transformer.py +++ /dev/null @@ -1,32 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -from unittest import TestCase - -from hcl2.dict_transformer import DictTransformer - - -class TestDictTransformer(TestCase): - """Test behaviour of hcl2.transformer.DictTransformer class""" - - @staticmethod - def build_dict_transformer(with_meta: bool = False) -> DictTransformer: - return DictTransformer(with_meta) - - def test_to_string_dollar(self): - string_values = { - '"bool"': "bool", - '"number"': "number", - '"string"': "string", - "${value_1}": "${value_1}", - '"value_2': '${"value_2}', - 'value_3"': '${value_3"}', - '"value_4"': "value_4", - "value_5": "${value_5}", - } - - dict_transformer = self.build_dict_transformer() - - for value, expected in string_values.items(): - actual = dict_transformer.to_string_dollar(value) - - self.assertEqual(actual, expected) diff --git a/test/unit/test_hcl2_syntax.py b/test/unit/test_hcl2_syntax.py deleted file mode 100644 index 96113df3..00000000 --- a/test/unit/test_hcl2_syntax.py +++ /dev/null @@ -1,193 +0,0 @@ -# pylint:disable=C0114,C0116,C0103,W0612 - -import string # pylint:disable=W4901 # https://stackoverflow.com/a/16651393 -from unittest import TestCase - -from test.helpers.hcl2_helper import Hcl2Helper - -from lark import UnexpectedToken, UnexpectedCharacters - - -class TestHcl2Syntax(Hcl2Helper, TestCase): - """Test parsing individual elements of HCL2 syntax""" - - def test_argument(self): - syntax = self.build_argument("identifier", '"expression"') - result = self.load_to_dict(syntax) - self.assertDictEqual(result, {"identifier": "expression"}) - - def test_identifier_starts_with_digit(self): - for i in range(0, 10): - argument = self.build_argument(f"{i}id") - with self.assertRaises(UnexpectedToken) as e: - self.load_to_dict(argument) - assert ( - f"Unexpected token Token('DECIMAL', '{i}') at line 1, column 1" - in str(e) - ) - - def test_identifier_starts_with_special_chars(self): - chars = string.punctuation.replace("_", "") - for i in chars: - argument = self.build_argument(f"{i}id") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier_contains_special_chars(self): - chars = string.punctuation.replace("_", "").replace("-", "") - for i in chars: - argument = self.build_argument(f"identifier{i}") - with self.assertRaises((UnexpectedToken, UnexpectedCharacters)) as e: - self.load_to_dict(argument) - - def test_identifier(self): - argument = self.build_argument("_-__identifier_-1234567890-_") - self.load_to_dict(argument) - - def test_block_no_labels(self): - block = """ - block { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{}]}) - - def test_block_single_label(self): - block = """ - block "label" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual(result, {"block": [{"label": {}}]}) - - def test_block_multiple_labels(self): - block = """ - block "label1" "label2" "label3" { - } - """ - result = self.load_to_dict(block) - self.assertDictEqual( - result, {"block": [{"label1": {"label2": {"label3": {}}}}]} - ) - - def test_unary_operation(self): - operations = [ - ("identifier = -10", {"identifier": -10}), - ("identifier = !true", {"identifier": "${!true}"}), - ] - for hcl, dict_ in operations: - result = self.load_to_dict(hcl) - self.assertDictEqual(result, dict_) - - def test_tuple(self): - tuple_ = """tuple = [ - identifier, - "string", 100, - true == false, - 5 + 5, function(), - ]""" - result = self.load_to_dict(tuple_) - self.assertDictEqual( - result, - { - "tuple": [ - "${identifier}", - "string", - 100, - "${true == false}", - "${5 + 5}", - "${function()}", - ] - }, - ) - - def test_object(self): - object_ = """object = { - key1: identifier, key2: "string", key3: 100, - key4: true == false // comment - key5: 5 + 5, key6: function(), - key7: value == null ? 1 : 0 - }""" - result = self.load_to_dict(object_) - self.assertDictEqual( - result, - { - "object": { - "key1": "${identifier}", - "key2": "string", - "key3": 100, - "key4": "${true == false}", - "key5": "${5 + 5}", - "key6": "${function()}", - "key7": "${value == null ? 1 : 0}", - } - }, - ) - - def test_function_call_and_arguments(self): - calls = { - "r = function()": {"r": "${function()}"}, - "r = function(arg1, arg2)": {"r": "${function(arg1, arg2)}"}, - """r = function( - arg1, arg2, - arg3, - ) - """: { - "r": "${function(arg1, arg2, arg3)}" - }, - } - - for call, expected in calls.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_index(self): - indexes = { - "r = identifier[10]": {"r": "${identifier[10]}"}, - "r = identifier.20": { - "r": "${identifier[2]}" - }, # TODO debug why `20` is parsed to `2` - """r = identifier["key"]""": {"r": '${identifier["key"]}'}, - """r = identifier.key""": {"r": "${identifier.key}"}, - } - for call, expected in indexes.items(): - result = self.load_to_dict(call) - self.assertDictEqual(result, expected) - - def test_e_notation(self): - literals = { - "var = 3e4": {"var": "${3e4}"}, - "var = 3.5e5": {"var": "${3.5e5}"}, - "var = -3e6": {"var": "${-3e6}"}, - "var = -2.3e4": {"var": "${-2.3e4}"}, - "var = -5e-2": {"var": "${-5e-2}"}, - "var = -6.1e-3": {"var": "${-6.1e-3}"}, - } - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) - - def test_null(self): - identifier = "var = null" - - expected = {"var": None} - - result = self.load_to_dict(identifier) - self.assertDictEqual(result, expected) - - def test_expr_term_parenthesis(self): - literals = { - "a = 1 * 2 + 3": {"a": "${1 * 2 + 3}"}, - "b = 1 * (2 + 3)": {"b": "${1 * (2 + 3)}"}, - "c = (1 * (2 + 3))": {"c": "${(1 * (2 + 3))}"}, - "conditional = value == null ? 1 : 0": { - "conditional": "${value == null ? 1 : 0}" - }, - "conditional = (value == null ? 1 : 0)": { - "conditional": "${(value == null ? 1 : 0)}" - }, - } - - for actual, expected in literals.items(): - result = self.load_to_dict(actual) - self.assertDictEqual(result, expected) diff --git a/test/unit/test_load.py b/test/unit/test_load.py deleted file mode 100644 index f9be8845..00000000 --- a/test/unit/test_load.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Test parsing a variety of hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -from hcl2.parser import PARSER_FILE, parser -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -JSON_DIR = HELPERS_DIR / "terraform-config-json" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] - - -class TestLoad(TestCase): - """Test parsing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_load_terraform(self): - """Test parsing a set of hcl2 files and force recreating the parser file""" - - # create a parser to make sure that the parser file is created - parser() - - # delete the parser file to force it to be recreated - PARSER_FILE.unlink() - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_load_terraform_from_cache(self): - """Test parsing a set of hcl2 files from a cached parser file""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """Loads a single hcl2 file, parses it and compares with the expected json""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - if not json_path.exists(): - assert ( - False - ), f"Expected json equivalent of the hcl file doesn't exist {json_path}" - - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict = hcl2.load(hcl_file) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, json_dict, f"\n\nfailed comparing {hcl_path_str}" - ) diff --git a/test/unit/test_load_with_meta.py b/test/unit/test_load_with_meta.py deleted file mode 100644 index b081844e..00000000 --- a/test/unit/test_load_with_meta.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Test parsing hcl files with meta parameters""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - -TEST_WITH_META_DIR = Path(__file__).absolute().parent.parent / "helpers" / "with-meta" -TF_FILE_PATH = TEST_WITH_META_DIR / "data_sources.tf" -JSON_FILE_PATH = TEST_WITH_META_DIR / "data_sources.json" - - -class TestLoadWithMeta(TestCase): - """Test parsing hcl files with meta parameters""" - - def test_load_terraform_meta(self): - """Test load() with with_meta flag set to true.""" - with TF_FILE_PATH.open("r") as tf_file, JSON_FILE_PATH.open("r") as json_file: - self.assertDictEqual( - json.load(json_file), - hcl2.load(tf_file, with_meta=True), - ) diff --git a/test/unit/test_reconstruct_ast.py b/test/unit/test_reconstruct_ast.py deleted file mode 100644 index b9545def..00000000 --- a/test/unit/test_reconstruct_ast.py +++ /dev/null @@ -1,112 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - for hcl_path in HCL2_FILES: - yield self.check_terraform, hcl_path - - def test_write_terraform_exact(self): - """ - Test reconstructing a set of hcl2 files, to make sure they - reconstruct exactly the same, including whitespace. - """ - - # the reconstruction process is not precise, so some files do not - # reconstruct their whitespace exactly the same, but they are - # syntactically equivalent. This list is a target for further - # improvements to the whitespace handling of the reconstruction - # algorithm. - inexact_files = [ - # the reconstructor loses commas on the last element in an array, - # even if they're in the input file - "iam.tf", - "variables.tf", - # the reconstructor doesn't preserve indentation within comments - # perfectly - "multiline_expressions.tf", - # the reconstructor doesn't preserve the line that a ternary is - # broken on. - "route_table.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_whitespace, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - try: - hcl2_dict = hcl2.loads(hcl_reconstructed) - except Exception as exc: - assert ( - False - ), f"failed to tokenize terraform in file reconstructed from `{hcl_path_str}`: {exc}" - - json_dict = json.load(json_file) - self.assertDictEqual( - hcl2_dict, - json_dict, - f"failed comparing {hcl_path_str} with reconstructed version", - ) - - def check_whitespace(self, hcl_path_str: str): - """Tests that the reconstructed file matches the original file exactly.""" - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - with hcl_path.open("r") as hcl_file: - hcl_file_content = hcl_file.read() - try: - hcl_ast = hcl2.parses(hcl_file_content) - except Exception as exc: - assert False, f"failed to tokenize terraform in `{hcl_path_str}`: {exc}" - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - assert ( - False - ), f"failed to reconstruct terraform in `{hcl_path_str}`: {exc}" - - self.assertMultiLineEqual( - hcl_reconstructed, - hcl_file_content, - f"file {hcl_path_str} does not match its reconstructed version \ - exactly. this is usually whitespace related.", - ) diff --git a/test/unit/test_reconstruct_dict.py b/test/unit/test_reconstruct_dict.py deleted file mode 100644 index a65e8429..00000000 --- a/test/unit/test_reconstruct_dict.py +++ /dev/null @@ -1,88 +0,0 @@ -""" Test reconstructing hcl files""" - -import json -import traceback -from pathlib import Path -from unittest import TestCase - -import hcl2 - - -HELPERS_DIR = Path(__file__).absolute().parent.parent / "helpers" -HCL2_DIR = HELPERS_DIR / "terraform-config" -HCL2_FILES = [str(file.relative_to(HCL2_DIR)) for file in HCL2_DIR.iterdir()] -JSON_DIR = HELPERS_DIR / "terraform-config-json" - - -class TestReconstruct(TestCase): - """Test reconstructing a variety of hcl files""" - - # print any differences fully to the console - maxDiff = None - - def test_write_terraform(self): - """Test reconstructing a set of hcl2 files, to make sure they parse to the same structure""" - - # the reconstruction process is not precise, so some files do not - # reconstruct any embedded HCL expressions exactly the same. this - # list captures those, and should be manually inspected regularly to - # ensure that files remain syntactically equivalent - inexact_files = [ - # one level of interpolation is stripped from this file during - # reconstruction, since we don't have a way to distinguish it from - # a complex HCL expression. the output parses to the same value - # though - "multi_level_interpolation.tf", - ] - - for hcl_path in HCL2_FILES: - if hcl_path not in inexact_files: - yield self.check_terraform, hcl_path - - def check_terraform(self, hcl_path_str: str): - """ - Loads a single hcl2 file, parses it, reconstructs it, - parses the reconstructed file, and compares with the expected json - """ - hcl_path = (HCL2_DIR / hcl_path_str).absolute() - json_path = JSON_DIR / hcl_path.relative_to(HCL2_DIR).with_suffix(".json") - with hcl_path.open("r") as hcl_file, json_path.open("r") as json_file: - try: - hcl2_dict_correct = hcl2.load(hcl_file) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'correct' terraform in " - f"`{hcl_path_str}`: {traceback.format_exc()}" - ) from exc - - json_dict = json.load(json_file) - - try: - hcl_ast = hcl2.reverse_transform(json_dict) - except Exception as exc: - raise RuntimeError( - f"failed to reverse transform HCL from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl_reconstructed = hcl2.writes(hcl_ast) - except Exception as exc: - raise RuntimeError( - f"failed to reconstruct terraform from AST from " - f"`{json_path.name}`: {traceback.format_exc()}" - ) from exc - - try: - hcl2_dict_reconstructed = hcl2.loads(hcl_reconstructed) - except Exception as exc: - raise RuntimeError( - f"failed to tokenize 'reconstructed' terraform from AST from " - f"`{json_path.name}`: {exc}, \n{hcl_reconstructed}" - ) from exc - - self.assertDictEqual( - hcl2_dict_reconstructed, - hcl2_dict_correct, - f"failed comparing {hcl_path_str} with reconstructed version from {json_path.name}", - ) From e32a5407f3cf4e0e052dfd10456a031ba7b4816c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:14:38 +0100 Subject: [PATCH 16/42] removed old unused file --- hcl2/rules/tree.py | 106 --------------------------------------------- 1 file changed, 106 deletions(-) delete mode 100644 hcl2/rules/tree.py diff --git a/hcl2/rules/tree.py b/hcl2/rules/tree.py deleted file mode 100644 index e39d2077..00000000 --- a/hcl2/rules/tree.py +++ /dev/null @@ -1,106 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Optional, Any, Union - - -class LarkNode(ABC): - """Base class for all nodes in the tree""" - - def __init__(self, index: int = -1, parent: Optional["Node"] = None): - self._index = index - self._parent = parent - - @property - def parent(self) -> Optional["Node"]: - return self._parent - - @property - def index(self) -> int: - return self._index - - def set_parent(self, parent: "Node"): - self._parent = parent - - def set_index(self, index: int): - self._index = index - - @abstractmethod - def serialize(self, options=None) -> Any: - pass - - @abstractmethod - def to_lark(self) -> Any: - """Convert back to Lark representation""" - pass - - def is_leaf(self) -> bool: - """Check if this is a leaf node (atomic token)""" - return isinstance(self, LeafNode) - - def is_sequence(self) -> bool: - """Check if this is a token sequence node""" - return isinstance(self, SequenceNode) - - def is_internal(self) -> bool: - """Check if this is an internal node (grammar rule)""" - return isinstance(self, InternalNode) - - def is_atomic(self) -> bool: - """Check if this represents an atomic value (leaf or sequence)""" - return self.is_leaf() or self.is_sequence() - - -class LarkLeaf(Node, ABC): - """""" - - def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): - super().__init__(index, parent) - self._value = value - - @property - def value(self) -> Any: - return self._value - - def serialize(self, options=None) -> Any: - return self._value - - -class InternalNode(Node): - def __init__( - self, children: List[Node], index: int = -1, parent: Optional[Node] = None - ): - super().__init__(index, parent) - self._children = children or [] - - # Set parent and index for all children - for i, child in enumerate(self._children): - if child is not None: - child.set_parent(self) - child.set_index(i) - - @property - def children(self) -> List[Node]: - return self._children - - def add_child(self, child: Node): - """Add a child to this internal node""" - child.set_parent(self) - child.set_index(len(self._children)) - self._children.append(child) - - def remove_child(self, index: int) -> Optional[Node]: - """Remove child at given index""" - if 0 <= index < len(self._children): - child = self._children.pop(index) - if child: - child.set_parent(None) - # Update indices for remaining children - for i in range(index, len(self._children)): - if self._children[i]: - self._children[i].set_index(i) - return child - return None - - @abstractmethod - def rule_name(self) -> str: - """The name of the grammar rule this represents""" - pass From 210e3cd2c354670b7ee3e0fde217d1862d4d26ba Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:15:24 +0100 Subject: [PATCH 17/42] fix - dont add spaces add the end of the line (before newline rule); remove unused import --- hcl2/reconstructor.py | 4 ++-- hcl2/rules/abstract.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index e92f7040..1b5260ac 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -87,8 +87,8 @@ def _should_add_space_before( # Space after commas in tuples and function arguments... if self._last_token_name == tokens.COMMA.lark_name(): - # ... except for last comma - if token_type == tokens.RSQB.lark_name(): + # ... except before closing brackets or newlines + if token_type in (tokens.RSQB.lark_name(), "NL_OR_COMMENT"): return False return True diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index a494d901..316c777a 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional, Tuple, Callable +from typing import Any, Union, List, Optional, Callable from lark import Token, Tree -from lark.exceptions import VisitError from lark.tree import Meta from hcl2.utils import SerializationOptions, SerializationContext From b235ec9845b3caea2b6218f8f29608567c42a240 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:27:24 +0100 Subject: [PATCH 18/42] use unittest subTest to fix noise in test results ("The type of the None singleton"); fix whitespaces in `test/round_trip/hcl2_reconstructed/smoke.tf` --- test/round_trip/hcl2_reconstructed/smoke.tf | 8 +- test/round_trip/test_round_trip.py | 107 +++++++++----------- 2 files changed, 53 insertions(+), 62 deletions(-) diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/round_trip/hcl2_reconstructed/smoke.tf index b5c54e96..8f17d6d6 100644 --- a/test/round_trip/hcl2_reconstructed/smoke.tf +++ b/test/round_trip/hcl2_reconstructed/smoke.tf @@ -32,7 +32,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } @@ -52,12 +52,12 @@ block label1 label3 { block { route53_forwarding_rule_shares = { - for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - + ] } ... if substr(bucket_name, 0, 1) == "l" } diff --git a/test/round_trip/test_round_trip.py b/test/round_trip/test_round_trip.py index b43340b6..93fcd111 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/round_trip/test_round_trip.py @@ -26,12 +26,13 @@ ROUND_TRIP_DIR = Path(__file__).absolute().parent HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" +SPECIAL_DIR = ROUND_TRIP_DIR / "special" + _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", "json_serialized": ROUND_TRIP_DIR / "json_serialized", "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", - "json_operator_precedence": ROUND_TRIP_DIR / "json_operator_precedence", } _STEP_SUFFIXES = { @@ -39,7 +40,6 @@ "hcl2_reconstructed": ".tf", "json_serialized": ".json", "json_reserialized": ".json", - "json_operator_precedence": ".json", } @@ -48,7 +48,6 @@ class SuiteStep(Enum): RECONSTRUCTED = "hcl2_reconstructed" JSON_SERIALIZED = "json_serialized" JSON_RESERIALIZED = "json_reserialized" - JSON_OPERATOR_PRECEDENCE = "json_operator_precedence" def _get_suites() -> List[str]: @@ -109,20 +108,18 @@ class TestRoundTripSerialization(TestCase): def test_hcl_to_json(self): for suite in _get_suites(): - yield self.check_hcl_to_json, suite - - def check_hcl_to_json(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_path = _get_suite_file(suite, SuiteStep.JSON_SERIALIZED) - actual = _parse_and_serialize(hcl_path.read_text()) - expected = json.loads(json_path.read_text()) + actual = _parse_and_serialize(hcl_path.read_text()) + expected = json.loads(json_path.read_text()) - self.assertEqual( - actual, - expected, - f"HCL → JSON serialization mismatch for {suite}", - ) + self.assertEqual( + actual, + expected, + f"HCL → JSON serialization mismatch for {suite}", + ) class TestRoundTripReserialization(TestCase): @@ -132,21 +129,19 @@ class TestRoundTripReserialization(TestCase): def test_json_reserialization(self): for suite in _get_suites(): - yield self.check_json_reserialization, suite + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) - def check_json_reserialization(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reserialize(serialized) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reserialize(serialized) - - expected = json.loads(json_reserialized_path.read_text()) - self.assertEqual( - actual, - expected, - f"JSON reserialization mismatch for {suite}", - ) + expected = json.loads(json_reserialized_path.read_text()) + self.assertEqual( + actual, + expected, + f"JSON reserialization mismatch for {suite}", + ) class TestRoundTripReconstruction(TestCase): @@ -156,21 +151,19 @@ class TestRoundTripReconstruction(TestCase): def test_json_to_hcl(self): for suite in _get_suites(): - yield self.check_json_to_hcl, suite - - def check_json_to_hcl(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + hcl_reconstructed_path = _get_suite_file(suite, SuiteStep.RECONSTRUCTED) - serialized = _parse_and_serialize(hcl_path.read_text()) - actual = _deserialize_and_reconstruct(serialized) + serialized = _parse_and_serialize(hcl_path.read_text()) + actual = _deserialize_and_reconstruct(serialized) - expected = hcl_reconstructed_path.read_text() - self.assertMultiLineEqual( - actual, - expected, - f"HCL reconstruction mismatch for {suite}", - ) + expected = hcl_reconstructed_path.read_text() + self.assertMultiLineEqual( + actual, + expected, + f"HCL reconstruction mismatch for {suite}", + ) class TestRoundTripFull(TestCase): @@ -180,27 +173,25 @@ class TestRoundTripFull(TestCase): def test_full_round_trip(self): for suite in _get_suites(): - yield self.check_full_round_trip, suite - - def check_full_round_trip(self, suite: str): - hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - original_hcl = hcl_path.read_text() + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() - # Forward: HCL → JSON - serialized = _parse_and_serialize(original_hcl) + # Forward: HCL → JSON + serialized = _parse_and_serialize(original_hcl) - # Reconstruct: JSON → HCL - reconstructed_hcl = _deserialize_and_reconstruct(serialized) + # Reconstruct: JSON → HCL + reconstructed_hcl = _deserialize_and_reconstruct(serialized) - # Re-parse: reconstructed HCL → JSON - reserialized = _parse_and_serialize(reconstructed_hcl) + # Reparse: reconstructed HCL → JSON + reserialized = _parse_and_serialize(reconstructed_hcl) - self.assertEqual( - reserialized, - serialized, - f"Full round-trip mismatch for {suite}: " - f"HCL → JSON → HCL → JSON did not produce identical JSON", - ) + self.assertEqual( + reserialized, + serialized, + f"Full round-trip mismatch for {suite}: " + f"HCL → JSON → HCL → JSON did not produce identical JSON", + ) class TestOperatorPrecedence(TestCase): From a3fe3267dc0361d3cf78ab5d8bc201c0e53d90ab Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 18:39:02 +0100 Subject: [PATCH 19/42] remove files for WIP features --- hcl2/editor.py | 77 -------------- hcl2/processor.py | 258 ---------------------------------------------- 2 files changed, 335 deletions(-) delete mode 100644 hcl2/editor.py delete mode 100644 hcl2/processor.py diff --git a/hcl2/editor.py b/hcl2/editor.py deleted file mode 100644 index 9efce08f..00000000 --- a/hcl2/editor.py +++ /dev/null @@ -1,77 +0,0 @@ -import dataclasses -from copy import copy, deepcopy -from typing import List, Optional, Set, Tuple - -from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.base import BlockRule, StartRule - - -@dataclasses.dataclass -class TreePathElement: - - name: str - index: int = 0 - - -@dataclasses.dataclass -class TreePath: - - elements: List[TreePathElement] = dataclasses.field(default_factory=list) - - @classmethod - def build(cls, elements: List[Tuple[str, Optional[int]] | str]): - results = [] - for element in elements: - if isinstance(element, tuple): - if len(element) == 1: - result = TreePathElement(element[0], 0) - else: - result = TreePathElement(*element) - else: - result = TreePathElement(element, 0) - - results.append(result) - - return cls(results) - - def __iter__(self): - return self.elements.__iter__() - - def __len__(self): - return self.elements.__len__() - - -class Editor: - def __init__(self, rules_tree: LarkRule): - self.rules_tree = rules_tree - - @classmethod - def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: - return cls._find_all(rules_tree, path_element.name)[path_element.index] - - @classmethod - def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: - children = [] - print("rule", rules_tree) - print("rule children", rules_tree.children) - for child in rules_tree.children: - if isinstance(child, LarkRule) and child.lark_name() == rule_name: - children.append(child) - - return children - - def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: - path = deepcopy(path.elements) - - current_rule = self.rules_tree - while len(path) > 0: - current_path, *path = path - print(current_path, path) - current_rule = self._find_one(current_rule, current_path) - - return self._find_all(current_rule, rule_name) - - # def visit(self, path: TreePath) -> "Editor": - # - # while len(path) > 1: - # current = diff --git a/hcl2/processor.py b/hcl2/processor.py deleted file mode 100644 index b854aff5..00000000 --- a/hcl2/processor.py +++ /dev/null @@ -1,258 +0,0 @@ -from copy import copy, deepcopy -from typing import ( - List, - Optional, - Union, - Callable, - Any, - Tuple, - Generic, - TypeVar, - cast, - Generator, -) - -from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement -from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule - -T = TypeVar("T", bound=LarkRule) - - -class RulesProcessor(Generic[T]): - """""" - - @classmethod - def _traverse( - cls, - node: T, - predicate: Callable[[T], bool], - current_depth: int = 0, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor"]: - - results = [] - - if predicate(node): - results.append(cls(node)) - - if max_depth is not None and current_depth >= max_depth: - return results - - for child in node.children: - if child is None or not isinstance(child, LarkRule): - continue - - child_results = cls._traverse( - child, - predicate, - current_depth + 1, - max_depth, - ) - results.extend(child_results) - - return results - - def __init__(self, node: LarkRule): - self.node = node - - @property - def siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children - - @property - def next_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[self.node.index + 1 :] - - @property - def previous_siblings(self): - if self.node.parent is None: - return None - return self.node.parent.children[: self.node.index - 1] - - def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: - child_processors = [self.__class__(child) for child in self.node.children] - yield self, child_processors - for processor in child_processors: - if isinstance(processor.node, LarkRule): - for result in processor.walk(): - yield result - - def find_block( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> "RulesProcessor[BlockRule]": - return self.find_blocks(labels, exact_match, max_depth)[0] - - def find_blocks( - self, - labels: List[str], - exact_match: bool = True, - max_depth: Optional[int] = None, - ) -> List["RulesProcessor[BlockRule]"]: - """ - Find blocks by their labels. - - Args: - labels: List of label strings to match - exact_match: If True, all labels must match exactly. If False, labels can be a subset. - max_depth: Maximum depth to search - - Returns: - ... - """ - - def block_predicate(node: LarkRule) -> bool: - if not isinstance(node, BlockRule): - return False - - node_labels = [label.serialize() for label in node.labels] - - if exact_match: - return node_labels == labels - else: - # Check if labels is a prefix of node_labels - if len(labels) > len(node_labels): - return False - return node_labels[: len(labels)] == labels - - return cast( - List[RulesProcessor[BlockRule]], - self._traverse(self.node, block_predicate, max_depth=max_depth), - ) - - def attribute( - self, name: str, max_depth: Optional[int] = None - ) -> "RulesProcessor[AttributeRule]": - return self.find_attributes(name, max_depth)[0] - - def find_attributes( - self, name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor[AttributeRule]"]: - """ - Find attributes by their identifier name. - - Args: - name: Attribute name to search for - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching attributes - """ - - def attribute_predicate(node: LarkRule) -> bool: - if not isinstance(node, AttributeRule): - return False - return node.identifier.serialize() == name - - return self._traverse(self.node, attribute_predicate, max_depth=max_depth) - - def rule(self, rule_name: str, max_depth: Optional[int] = None): - return self.find_rules(rule_name, max_depth)[0] - - def find_rules( - self, rule_name: str, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules of a specific type. - - Args: - rule_name: Name of the rule type to find - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - - def rule_predicate(node: LarkRule) -> bool: - return node.lark_name() == rule_name - - return self._traverse(self.node, rule_predicate, max_depth=max_depth) - - def find_by_predicate( - self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """ - Find all rules matching a custom predicate. - - Args: - predicate: Function that returns True for nodes to collect - max_depth: Maximum depth to search - - Returns: - List of TreePath objects for matching rules - """ - return self._traverse(self.node, predicate, max_depth) - - # Convenience methods - def get_all_blocks(self, max_depth: Optional[int] = None) -> List: - """Get all blocks in the tree.""" - return self.find_rules("block", max_depth) - - def get_all_attributes( - self, max_depth: Optional[int] = None - ) -> List["RulesProcessor"]: - """Get all attributes in the tree.""" - return self.find_rules("attribute", max_depth) - - def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in reversed(self.previous_siblings): - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: - """Get the next sibling node.""" - if self.node.parent is None: - return None - - for sibling in self.next_siblings: - if sibling is not None and isinstance(sibling, LarkRule): - if skip_new_line and isinstance(sibling, NewLineOrCommentRule): - continue - return self.__class__(sibling) - - def append_child( - self, new_node: LarkRule, indentation: bool = True - ) -> "RulesProcessor": - children = self.node.children - if indentation: - if isinstance(children[-1], NewLineOrCommentRule): - children.pop() - children.append(NewLineOrCommentRule.from_string("\n ")) - - new_node = deepcopy(new_node) - new_node.set_parent(self.node) - new_node.set_index(len(children)) - children.append(new_node) - return self.__class__(new_node) - - def replace(self, new_node: LarkRule) -> "RulesProcessor": - new_node = deepcopy(new_node) - - self.node.parent.children.pop(self.node.index) - self.node.parent.children.insert(self.node.index, new_node) - new_node.set_parent(self.node.parent) - new_node.set_index(self.node.index) - return self.__class__(new_node) - - # def insert_before(self, new_node: LarkRule) -> bool: - # """Insert a new node before this one.""" - # if self.parent is None or self.parent_index < 0: - # return False - # - # try: - # self.parent.children.insert(self.parent_index, new_node) - # except (IndexError, AttributeError): - # return False From 4054fc9627d70028f56124fc22c2c112ef4752f9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 19:30:21 +0100 Subject: [PATCH 20/42] add new unit tests, exclude some files from coverage report --- .coveragerc | 3 + test/helpers/__init__.py | 3 - test/helpers/hcl2_helper.py | 21 -- test/unit/__init__.py | 0 test/unit/rules/__init__.py | 0 test/unit/rules/test_abstract.py | 178 ++++++++++ test/unit/rules/test_containers.py | 396 +++++++++++++++++++++ test/unit/rules/test_expressions.py | 489 ++++++++++++++++++++++++++ test/unit/rules/test_literal_rules.py | 95 +++++ test/unit/rules/test_strings.py | 247 +++++++++++++ test/unit/rules/test_tokens.py | 162 +++++++++ test/unit/rules/test_whitespace.py | 135 +++++++ test/unit/test_utils.py | 148 ++++++++ 13 files changed, 1853 insertions(+), 24 deletions(-) delete mode 100644 test/helpers/__init__.py delete mode 100644 test/helpers/hcl2_helper.py create mode 100644 test/unit/__init__.py create mode 100644 test/unit/rules/__init__.py create mode 100644 test/unit/rules/test_abstract.py create mode 100644 test/unit/rules/test_containers.py create mode 100644 test/unit/rules/test_expressions.py create mode 100644 test/unit/rules/test_literal_rules.py create mode 100644 test/unit/rules/test_strings.py create mode 100644 test/unit/rules/test_tokens.py create mode 100644 test/unit/rules/test_whitespace.py create mode 100644 test/unit/test_utils.py diff --git a/.coveragerc b/.coveragerc index 4facabdc..30e6dc8c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,6 +3,9 @@ branch = true omit = hcl2/__main__.py hcl2/lark_parser.py + hcl2/version.py + hcl2/__init__.py + hcl2/rules/__init__.py [report] show_missing = true diff --git a/test/helpers/__init__.py b/test/helpers/__init__.py deleted file mode 100644 index ba33e308..00000000 --- a/test/helpers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Helper functions for tests -""" diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py deleted file mode 100644 index c39ee7fb..00000000 --- a/test/helpers/hcl2_helper.py +++ /dev/null @@ -1,21 +0,0 @@ -# pylint:disable=C0114,C0115,C0116 - -from lark import Tree - -from hcl2.parser import parser -from hcl2.dict_transformer import DictTransformer - - -class Hcl2Helper: - @classmethod - def load(cls, syntax: str) -> Tree: - return parser().parse(syntax) - - @classmethod - def load_to_dict(cls, syntax) -> dict: - tree = cls.load(syntax) - return DictTransformer().transform(tree) - - @classmethod - def build_argument(cls, identifier: str, expression: str = '"expression"') -> str: - return f"{identifier} = {expression}" diff --git a/test/unit/__init__.py b/test/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/__init__.py b/test/unit/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py new file mode 100644 index 00000000..8803effc --- /dev/null +++ b/test/unit/rules/test_abstract.py @@ -0,0 +1,178 @@ +from unittest import TestCase + +from lark import Token, Tree +from lark.tree import Meta + +from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Concrete stubs for testing ABCs --- + + +class ConcreteToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "TEST_TOKEN" + + @property + def serialize_conversion(self): + return str + + +class IntToken(LarkToken): + @staticmethod + def lark_name() -> str: + return "INT_TOKEN" + + @property + def serialize_conversion(self): + return int + + +class ConcreteRule(LarkRule): + @staticmethod + def lark_name() -> str: + return "test_rule" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +# --- Tests --- + + +class TestLarkToken(TestCase): + def test_init_stores_value(self): + token = ConcreteToken("hello") + self.assertEqual(token.value, "hello") + + def test_value_property(self): + token = ConcreteToken(42) + self.assertEqual(token.value, 42) + + def test_set_value(self): + token = ConcreteToken("old") + token.set_value("new") + self.assertEqual(token.value, "new") + + def test_str(self): + token = ConcreteToken("hello") + self.assertEqual(str(token), "hello") + + def test_str_numeric(self): + token = ConcreteToken(42) + self.assertEqual(str(token), "42") + + def test_repr(self): + token = ConcreteToken("hello") + self.assertEqual(repr(token), "") + + def test_to_lark_returns_token(self): + token = ConcreteToken("val") + lark_token = token.to_lark() + self.assertIsInstance(lark_token, Token) + self.assertEqual(lark_token.type, "TEST_TOKEN") + self.assertEqual(lark_token, "val") + + def test_serialize_uses_conversion(self): + token = ConcreteToken("hello") + self.assertEqual(token.serialize(), "hello") + + def test_serialize_int_conversion(self): + token = IntToken("42") + result = token.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + def test_lark_name(self): + self.assertEqual(ConcreteToken.lark_name(), "TEST_TOKEN") + + +class TestLarkRule(TestCase): + def test_init_sets_children(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertEqual(rule.children, [t1, t2]) + + def test_init_sets_parent_and_index(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + self.assertIs(t1._parent, rule) + self.assertIs(t2._parent, rule) + self.assertEqual(t1._index, 0) + self.assertEqual(t2._index, 1) + + def test_init_skips_none_children_for_parent_index(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + self.assertIs(t1._parent, rule) + self.assertEqual(t1._index, 1) + + def test_init_with_meta(self): + meta = Meta() + rule = ConcreteRule([], meta) + self.assertIs(rule._meta, meta) + + def test_init_without_meta(self): + rule = ConcreteRule([]) + self.assertIsNotNone(rule._meta) + + def test_parent_property(self): + child_rule = ConcreteRule([]) + parent_rule = ConcreteRule([child_rule]) + self.assertIs(child_rule.parent, parent_rule) + + def test_index_property(self): + child_rule = ConcreteRule([]) + ConcreteRule([child_rule]) + self.assertEqual(child_rule.index, 0) + + def test_children_property(self): + t = ConcreteToken("x") + rule = ConcreteRule([t]) + self.assertEqual(rule.children, [t]) + + def test_to_lark_builds_tree(self): + t1 = ConcreteToken("a") + t2 = ConcreteToken("b") + rule = ConcreteRule([t1, t2]) + tree = rule.to_lark() + self.assertIsInstance(tree, Tree) + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 2) + + def test_to_lark_skips_none_children(self): + t1 = ConcreteToken("a") + rule = ConcreteRule([None, t1, None]) + tree = rule.to_lark() + self.assertEqual(len(tree.children), 1) + self.assertEqual(tree.children[0], "a") + + def test_repr(self): + rule = ConcreteRule([]) + self.assertEqual(repr(rule), "") + + def test_nested_rules(self): + inner = ConcreteRule([ConcreteToken("x")]) + outer = ConcreteRule([inner]) + self.assertIs(inner.parent, outer) + tree = outer.to_lark() + self.assertEqual(tree.data, "test_rule") + self.assertEqual(len(tree.children), 1) + self.assertIsInstance(tree.children[0], Tree) + + +class TestLarkElement(TestCase): + def test_set_index(self): + token = ConcreteToken("x") + token.set_index(5) + self.assertEqual(token._index, 5) + + def test_set_parent(self): + token = ConcreteToken("x") + parent = ConcreteRule([]) + token.set_parent(parent) + self.assertIs(token._parent, parent) diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py new file mode 100644 index 00000000..b49b3f38 --- /dev/null +++ b/test/unit/rules/test_containers.py @@ -0,0 +1,396 @@ +from unittest import TestCase + +from hcl2.rules.containers import ( + TupleRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemRule, + ObjectRule, +) +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + LPAR, + RPAR, + DOT, + EQ, + COLON, + COMMA, + NAME, + DBLQUOTE, + STRING_CHARS, + IntLiteral, + FloatLiteral, +) +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.rules.tokens import NL_OR_COMMENT +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & Helpers --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_object_elem_key(identifier_name): + return ObjectElemKeyRule([_make_identifier(identifier_name)]) + + +def _make_object_elem(key_name, expr_value, sep=None): + key = _make_object_elem_key(key_name) + separator = sep or EQ() + return ObjectElemRule([key, separator, StubExpression(expr_value)]) + + +# --- TupleRule tests --- + + +class TestTupleRule(TestCase): + def test_lark_name(self): + self.assertEqual(TupleRule.lark_name(), "tuple") + + def test_elements_empty_tuple(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + expr = StubExpression(1) + rule = TupleRule([LSQB(), expr, RSQB()]) + self.assertEqual(rule.elements, [expr]) + + def test_elements_multiple(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + e3 = StubExpression(3) + rule = TupleRule([LSQB(), e1, COMMA(), e2, COMMA(), e3, RSQB()]) + self.assertEqual(rule.elements, [e1, e2, e3]) + + def test_elements_skips_non_expressions(self): + e1 = StubExpression(1) + e2 = StubExpression(2) + nlc = _make_nlc("\n") + rule = TupleRule([LSQB(), nlc, e1, COMMA(), nlc, e2, RSQB()]) + self.assertEqual(len(rule.elements), 2) + + def test_serialize_default_returns_list(self): + rule = TupleRule( + [LSQB(), StubExpression(1), COMMA(), StubExpression(2), RSQB()] + ) + result = rule.serialize() + self.assertEqual(result, [1, 2]) + + def test_serialize_empty_returns_empty_list(self): + rule = TupleRule([LSQB(), RSQB()]) + self.assertEqual(rule.serialize(), []) + + def test_serialize_single_element(self): + rule = TupleRule([LSQB(), StubExpression(42), RSQB()]) + self.assertEqual(rule.serialize(), [42]) + + def test_serialize_wrap_tuples(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[a, b]}") + + def test_serialize_wrap_tuples_empty(self): + rule = TupleRule([LSQB(), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${[]}") + + def test_serialize_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("a"), RSQB()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "[a]") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = TupleRule( + [LSQB(), StubExpression("a"), COMMA(), StubExpression("b"), RSQB()] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "[a, b]") + + def test_serialize_wrap_tuples_inside_dollar_string(self): + rule = TupleRule([LSQB(), StubExpression("x"), RSQB()]) + opts = SerializationOptions(wrap_tuples=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + # Already inside $, so no extra wrapping + self.assertEqual(result, "[x]") + + +# --- ObjectElemKeyRule tests --- + + +class TestObjectElemKeyRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemKeyRule.lark_name(), "object_elem_key") + + def test_value_property_identifier(self): + ident = _make_identifier("foo") + rule = ObjectElemKeyRule([ident]) + self.assertIs(rule.value, ident) + + def test_serialize_identifier(self): + rule = ObjectElemKeyRule([_make_identifier("my_key")]) + self.assertEqual(rule.serialize(), "my_key") + + def test_serialize_int_lit(self): + rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) + self.assertEqual(rule.serialize(), 5) + + def test_serialize_float_lit(self): + rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) + self.assertAlmostEqual(rule.serialize(), 3.14) + + def test_serialize_string(self): + rule = ObjectElemKeyRule([_make_string_rule("k3")]) + self.assertEqual(rule.serialize(), '"k3"') + + +# --- ObjectElemKeyExpressionRule tests --- + + +class TestObjectElemKeyExpressionRule(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" + ) + + def test_expression_property(self): + expr = StubExpression("5 + 5") + rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + self.assertIs(rule.expression, expr) + + def test_serialize(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + result = rule.serialize() + self.assertEqual(result, "${(5 + 5)}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "(5 + 5)") + + +# --- ObjectElemKeyDotAccessor tests --- + + +class TestObjectElemKeyDotAccessor(TestCase): + def test_lark_name(self): + self.assertEqual( + ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" + ) + + def test_identifiers_property(self): + i1 = _make_identifier("k5") + i2 = _make_identifier("attr") + i3 = _make_identifier("sub") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) + idents = rule.identifiers + self.assertEqual(len(idents), 3) + self.assertIs(idents[0], i1) + self.assertIs(idents[1], i2) + self.assertIs(idents[2], i3) + + def test_identifiers_two_segments(self): + i1 = _make_identifier("a") + i2 = _make_identifier("b") + rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) + self.assertEqual(len(rule.identifiers), 2) + + def test_serialize(self): + rule = ObjectElemKeyDotAccessor( + [ + _make_identifier("k5"), + DOT(), + _make_identifier("attr"), + DOT(), + _make_identifier("sub"), + ] + ) + self.assertEqual(rule.serialize(), "k5.attr.sub") + + def test_serialize_two_segments(self): + rule = ObjectElemKeyDotAccessor( + [_make_identifier("a"), DOT(), _make_identifier("b")] + ) + self.assertEqual(rule.serialize(), "a.b") + + +# --- ObjectElemRule tests --- + + +class TestObjectElemRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectElemRule.lark_name(), "object_elem") + + def test_key_property(self): + key = _make_object_elem_key("foo") + rule = ObjectElemRule([key, EQ(), StubExpression("bar")]) + self.assertIs(rule.key, key) + + def test_expression_property(self): + expr = StubExpression("bar") + rule = ObjectElemRule([_make_object_elem_key("foo"), EQ(), expr]) + self.assertIs(rule.expression, expr) + + def test_serialize_with_eq(self): + rule = _make_object_elem("name", "value") + self.assertEqual(rule.serialize(), {"name": "value"}) + + def test_serialize_with_colon(self): + rule = ObjectElemRule([_make_object_elem_key("k"), COLON(), StubExpression(42)]) + self.assertEqual(rule.serialize(), {"k": 42}) + + def test_serialize_int_value(self): + rule = _make_object_elem("count", 5) + self.assertEqual(rule.serialize(), {"count": 5}) + + def test_serialize_string_key(self): + key = ObjectElemKeyRule([_make_string_rule("quoted")]) + rule = ObjectElemRule([key, EQ(), StubExpression("val")]) + self.assertEqual(rule.serialize(), {'"quoted"': "val"}) + + +# --- ObjectRule tests --- + + +class TestObjectRule(TestCase): + def test_lark_name(self): + self.assertEqual(ObjectRule.lark_name(), "object") + + def test_elements_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.elements, []) + + def test_elements_single(self): + elem = _make_object_elem("k", "v") + rule = ObjectRule([LBRACE(), elem, RBRACE()]) + self.assertEqual(rule.elements, [elem]) + + def test_elements_multiple(self): + e1 = _make_object_elem("a", 1) + e2 = _make_object_elem("b", 2) + rule = ObjectRule([LBRACE(), e1, e2, RBRACE()]) + self.assertEqual(rule.elements, [e1, e2]) + + def test_elements_skips_non_elem(self): + e1 = _make_object_elem("a", 1) + nlc = _make_nlc("\n") + rule = ObjectRule([LBRACE(), nlc, e1, nlc, RBRACE()]) + self.assertEqual(rule.elements, [e1]) + + def test_serialize_default_returns_dict(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + result = rule.serialize() + self.assertEqual(result, {"k1": "v1", "k2": "v2"}) + + def test_serialize_empty_returns_empty_dict(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + self.assertEqual(rule.serialize(), {}) + + def test_serialize_single_element(self): + rule = ObjectRule([LBRACE(), _make_object_elem("x", 42), RBRACE()]) + self.assertEqual(rule.serialize(), {"x": 42}) + + def test_serialize_wrap_objects(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k1", "v1"), + _make_object_elem("k2", "v2"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + # Result is "{k1 = v1, k2 = v2}" wrapped in ${}, giving ${{...}} + self.assertEqual(result, "${{k1 = v1, k2 = v2}}") + + def test_serialize_wrap_objects_empty(self): + rule = ObjectRule([LBRACE(), RBRACE()]) + opts = SerializationOptions(wrap_objects=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${{}}") + + def test_serialize_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string forces string representation + self.assertEqual(result, "{k = v}") + + def test_serialize_inside_dollar_string_no_extra_wrap(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("a", 1), + _make_object_elem("b", 2), + RBRACE(), + ] + ) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "{a = 1, b = 2}") + + def test_serialize_wrap_objects_inside_dollar_string(self): + rule = ObjectRule( + [ + LBRACE(), + _make_object_elem("k", "v"), + RBRACE(), + ] + ) + opts = SerializationOptions(wrap_objects=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "{k = v}") diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py new file mode 100644 index 00000000..16800ed0 --- /dev/null +++ b/test/unit/rules/test_expressions.py @@ -0,0 +1,489 @@ +from unittest import TestCase + +from hcl2.rules.abstract import LarkRule +from hcl2.rules.expressions import ( + ExpressionRule, + ExprTermRule, + ConditionalRule, + BinaryTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.tokens import ( + LPAR, + RPAR, + QMARK, + COLON, + BINARY_OP, + NAME, + StringToken, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value, children=None): + self._stub_value = value + super().__init__(children or [], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +class NonExpressionRule(LarkRule): + """A rule that is NOT an ExpressionRule, for parent-chain tests.""" + + @staticmethod + def lark_name(): + return "non_expression" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "non_expr" + + +def _make_expr_term(value): + """Build ExprTermRule wrapping a StubExpression (no parens).""" + return ExprTermRule([StubExpression(value)]) + + +def _make_paren_expr_term(value): + """Build ExprTermRule wrapping a StubExpression in parentheses.""" + return ExprTermRule([LPAR(), StubExpression(value), RPAR()]) + + +def _make_binary_operator(op_str): + """Build BinaryOperatorRule for an operator string.""" + return BinaryOperatorRule([BINARY_OP(op_str)]) + + +def _make_binary_term(op_str, rhs_value): + """Build BinaryTermRule with given operator and RHS value.""" + return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) + + +MINUS_TOKEN = StringToken["MINUS"] +NOT_TOKEN = StringToken["NOT"] + + +# --- ExprTermRule tests --- + + +class TestExprTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(ExprTermRule.lark_name(), "expr_term") + + def test_construction_without_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertFalse(rule.parentheses) + + def test_construction_without_parens_children_structure(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + # children: [None, None, stub, None, None] + self.assertEqual(len(rule.children), 5) + self.assertIsNone(rule.children[0]) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIsNone(rule.children[4]) + + def test_construction_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertTrue(rule.parentheses) + + def test_construction_with_parens_children_structure(self): + stub = StubExpression("a") + lpar = LPAR() + rpar = RPAR() + rule = ExprTermRule([lpar, stub, rpar]) + # children: [LPAR, None, stub, None, RPAR] + self.assertEqual(len(rule.children), 5) + self.assertIs(rule.children[0], lpar) + self.assertIsNone(rule.children[1]) + self.assertIs(rule.children[2], stub) + self.assertIsNone(rule.children[3]) + self.assertIs(rule.children[4], rpar) + + def test_expression_property(self): + stub = StubExpression("a") + rule = ExprTermRule([stub]) + self.assertIs(rule.expression, stub) + + def test_expression_property_with_parens(self): + stub = StubExpression("a") + rule = ExprTermRule([LPAR(), stub, RPAR()]) + self.assertIs(rule.expression, stub) + + def test_serialize_no_parens_delegates_to_inner(self): + rule = _make_expr_term("hello") + self.assertEqual(rule.serialize(), "hello") + + def test_serialize_no_parens_passes_through_int(self): + stub = StubExpression(42) + rule = ExprTermRule([stub]) + self.assertEqual(rule.serialize(), 42) + + def test_serialize_with_parens_wraps_and_dollar(self): + rule = _make_paren_expr_term("a") + result = rule.serialize() + self.assertEqual(result, "${(a)}") + + def test_serialize_with_parens_inside_dollar_string(self): + rule = _make_paren_expr_term("a") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + # Inside dollar string: wraps in () but NOT in ${} + self.assertEqual(result, "(a)") + + def test_serialize_sets_inside_parentheses_context(self): + """When parenthesized, inner expression should see inside_parentheses=True.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([LPAR(), ContextCapture([]), RPAR()]) + rule.serialize() + self.assertTrue(seen_context["inside_parentheses"]) + + def test_serialize_no_parens_preserves_inside_parentheses(self): + """Without parens, inside_parentheses passes through from caller context.""" + seen_context = {} + + class ContextCapture(ExpressionRule): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ): + seen_context["inside_parentheses"] = context.inside_parentheses + return "x" + + rule = ExprTermRule([ContextCapture([])]) + rule.serialize(context=SerializationContext(inside_parentheses=False)) + self.assertFalse(seen_context["inside_parentheses"]) + + +# --- ConditionalRule tests --- + + +class TestConditionalRule(TestCase): + def _make_conditional(self, cond_val="cond", true_val="yes", false_val="no"): + return ConditionalRule( + [ + StubExpression(cond_val), + QMARK(), + StubExpression(true_val), + COLON(), + StubExpression(false_val), + ] + ) + + def test_lark_name(self): + self.assertEqual(ConditionalRule.lark_name(), "conditional") + + def test_construction_inserts_optional_slots(self): + rule = self._make_conditional() + # Should have 8 children after _insert_optionals at [2, 4, 6] + self.assertEqual(len(rule.children), 8) + + def test_condition_property(self): + cond = StubExpression("cond") + rule = ConditionalRule( + [cond, QMARK(), StubExpression("t"), COLON(), StubExpression("f")] + ) + self.assertIs(rule.condition, cond) + + def test_if_true_property(self): + true_expr = StubExpression("yes") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + true_expr, + COLON(), + StubExpression("f"), + ] + ) + self.assertIs(rule.if_true, true_expr) + + def test_if_false_property(self): + false_expr = StubExpression("no") + rule = ConditionalRule( + [ + StubExpression("c"), + QMARK(), + StubExpression("t"), + COLON(), + false_expr, + ] + ) + self.assertIs(rule.if_false, false_expr) + + def test_serialize_format(self): + rule = self._make_conditional("a", "b", "c") + result = rule.serialize() + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_wraps_in_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + result = rule.serialize() + self.assertTrue(result.startswith("${")) + self.assertTrue(result.endswith("}")) + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_conditional("x", "y", "z") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "x ? y : z") + + def test_serialize_force_parens_no_parent(self): + """force_operation_parentheses with no parent → no wrapping.""" + rule = self._make_conditional("a", "b", "c") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + # No parent, so _wrap_into_parentheses returns unchanged + self.assertEqual(result, "${a ? b : c}") + + def test_serialize_force_parens_with_expression_parent(self): + """force_operation_parentheses with ExpressionRule parent → wraps.""" + rule = self._make_conditional("a", "b", "c") + # Nest inside another expression to set parent + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a ? b : c)}") + + +# --- BinaryTermRule tests --- + + +class TestBinaryTermRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryTermRule.lark_name(), "binary_term") + + def test_construction_inserts_optional(self): + rule = _make_binary_term("+", "b") + # [BinaryOperatorRule, None, ExprTermRule] + self.assertEqual(len(rule.children), 3) + self.assertIsNone(rule.children[1]) + + def test_binary_operator_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.binary_operator, op) + + def test_expr_term_property(self): + op = _make_binary_operator("+") + rhs = _make_expr_term("b") + rule = BinaryTermRule([op, rhs]) + self.assertIs(rule.expr_term, rhs) + + def test_serialize(self): + rule = _make_binary_term("+", "b") + result = rule.serialize() + self.assertEqual(result, "+ b") + + def test_serialize_equals_operator(self): + rule = _make_binary_term("==", "x") + self.assertEqual(rule.serialize(), "== x") + + def test_serialize_and_operator(self): + rule = _make_binary_term("&&", "y") + self.assertEqual(rule.serialize(), "&& y") + + +# --- BinaryOpRule tests --- + + +class TestBinaryOpRule(TestCase): + def _make_binary_op(self, lhs_val, op_str, rhs_val): + lhs = _make_expr_term(lhs_val) + bt = _make_binary_term(op_str, rhs_val) + return BinaryOpRule([lhs, bt, None]) + + def test_lark_name(self): + self.assertEqual(BinaryOpRule.lark_name(), "binary_op") + + def test_expr_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.expr_term, lhs) + + def test_binary_term_property(self): + lhs = _make_expr_term("a") + bt = _make_binary_term("+", "b") + rule = BinaryOpRule([lhs, bt, None]) + self.assertIs(rule.binary_term, bt) + + def test_serialize_addition(self): + rule = self._make_binary_op("a", "+", "b") + self.assertEqual(rule.serialize(), "${a + b}") + + def test_serialize_equality(self): + rule = self._make_binary_op("x", "==", "y") + self.assertEqual(rule.serialize(), "${x == y}") + + def test_serialize_and(self): + rule = self._make_binary_op("p", "&&", "q") + self.assertEqual(rule.serialize(), "${p && q}") + + def test_serialize_multiply(self): + rule = self._make_binary_op("a", "*", "b") + self.assertEqual(rule.serialize(), "${a * b}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_binary_op("a", "+", "b") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "a + b") + + def test_serialize_force_parens_no_parent(self): + """No parent → _wrap_into_parentheses returns unchanged.""" + rule = self._make_binary_op("a", "+", "b") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${a + b}") + + def test_serialize_force_parens_with_expression_parent(self): + """With ExpressionRule parent → wraps in parens.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(a + b)}") + + def test_serialize_force_parens_inside_dollar_string_with_parent(self): + """Inside dollar string + parent → parens without extra ${}.""" + rule = self._make_binary_op("a", "+", "b") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(options=opts, context=ctx) + self.assertEqual(result, "(a + b)") + + +# --- UnaryOpRule tests --- + + +class TestUnaryOpRule(TestCase): + def _make_unary(self, op_str, operand_val): + token_cls = MINUS_TOKEN if op_str == "-" else NOT_TOKEN + token = token_cls(op_str) + expr_term = _make_expr_term(operand_val) + return UnaryOpRule([token, expr_term]) + + def test_lark_name(self): + self.assertEqual(UnaryOpRule.lark_name(), "unary_op") + + def test_operator_property_minus(self): + rule = self._make_unary("-", "x") + self.assertEqual(rule.operator, "-") + + def test_operator_property_not(self): + rule = self._make_unary("!", "x") + self.assertEqual(rule.operator, "!") + + def test_expr_term_property(self): + expr_term = _make_expr_term("x") + token = MINUS_TOKEN("-") + rule = UnaryOpRule([token, expr_term]) + self.assertIs(rule.expr_term, expr_term) + + def test_serialize_minus(self): + rule = self._make_unary("-", "a") + self.assertEqual(rule.serialize(), "${-a}") + + def test_serialize_not(self): + rule = self._make_unary("!", "flag") + self.assertEqual(rule.serialize(), "${!flag}") + + def test_serialize_no_double_wrap_inside_dollar_string(self): + rule = self._make_unary("-", "x") + ctx = SerializationContext(inside_dollar_string=True) + result = rule.serialize(context=ctx) + self.assertEqual(result, "-x") + + def test_serialize_force_parens_no_parent(self): + rule = self._make_unary("-", "x") + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${-x}") + + def test_serialize_force_parens_with_expression_parent(self): + rule = self._make_unary("-", "x") + StubExpression("outer", children=[rule]) + opts = SerializationOptions(force_operation_parentheses=True) + result = rule.serialize(options=opts) + self.assertEqual(result, "${(-x)}") + + +# --- ExpressionRule._wrap_into_parentheses tests --- + + +class TestWrapIntoParenthesesMethod(TestCase): + def test_returns_unchanged_when_inside_parentheses(self): + expr = StubExpression("test") + ctx = SerializationContext(inside_parentheses=True) + result = expr._wrap_into_parentheses("${x}", context=ctx) + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_no_parent(self): + expr = StubExpression("test") + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_returns_unchanged_when_parent_not_expression(self): + expr = StubExpression("test") + NonExpressionRule([expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_wraps_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_wraps_plain_string_when_parent_is_expression(self): + expr = StubExpression("test") + StubExpression("outer", children=[expr]) + result = expr._wrap_into_parentheses("a + b") + self.assertEqual(result, "(a + b)") + + def test_expr_term_parent_with_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is ExpressionRule → wraps.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + # inner is now at expr_term._children[2], parent=expr_term + StubExpression("grandparent", children=[expr_term]) + # expr_term.parent = grandparent (ExpressionRule) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${(x)}") + + def test_expr_term_parent_with_non_expression_grandparent(self): + """Parent is ExprTermRule, grandparent is NOT ExpressionRule → no wrap.""" + inner = StubExpression("test") + expr_term = ExprTermRule([inner]) + NonExpressionRule([expr_term]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") + + def test_expr_term_parent_with_no_grandparent(self): + """Parent is ExprTermRule with no parent → no wrap.""" + inner = StubExpression("test") + ExprTermRule([inner]) + result = inner._wrap_into_parentheses("${x}") + self.assertEqual(result, "${x}") diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py new file mode 100644 index 00000000..f6b8b94c --- /dev/null +++ b/test/unit/rules/test_literal_rules.py @@ -0,0 +1,95 @@ +from unittest import TestCase + +from hcl2.rules.literal_rules import ( + TokenRule, + KeywordRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + BinaryOperatorRule, +) +from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral + + +class TestKeywordRule(TestCase): + def test_lark_name(self): + self.assertEqual(KeywordRule.lark_name(), "keyword") + + def test_token_property(self): + token = NAME("true") + rule = KeywordRule([token]) + self.assertIs(rule.token, token) + + def test_serialize(self): + rule = KeywordRule([NAME("true")]) + self.assertEqual(rule.serialize(), "true") + + +class TestIdentifierRule(TestCase): + def test_lark_name(self): + self.assertEqual(IdentifierRule.lark_name(), "identifier") + + def test_serialize(self): + rule = IdentifierRule([NAME("my_var")]) + self.assertEqual(rule.serialize(), "my_var") + + def test_token_property(self): + token = NAME("foo") + rule = IdentifierRule([token]) + self.assertIs(rule.token, token) + + +class TestIntLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(IntLitRule.lark_name(), "int_lit") + + def test_serialize_returns_int(self): + rule = IntLitRule([IntLiteral("42")]) + result = rule.serialize() + self.assertEqual(result, 42) + self.assertIsInstance(result, int) + + +class TestFloatLitRule(TestCase): + def test_lark_name(self): + self.assertEqual(FloatLitRule.lark_name(), "float_lit") + + def test_serialize_returns_float(self): + rule = FloatLitRule([FloatLiteral("3.14")]) + result = rule.serialize() + self.assertAlmostEqual(result, 3.14) + self.assertIsInstance(result, float) + + +class TestBinaryOperatorRule(TestCase): + def test_lark_name(self): + self.assertEqual(BinaryOperatorRule.lark_name(), "binary_operator") + + def test_serialize_plus(self): + rule = BinaryOperatorRule([BINARY_OP("+")]) + self.assertEqual(rule.serialize(), "+") + + def test_serialize_equals(self): + rule = BinaryOperatorRule([BINARY_OP("==")]) + self.assertEqual(rule.serialize(), "==") + + def test_serialize_and(self): + rule = BinaryOperatorRule([BINARY_OP("&&")]) + self.assertEqual(rule.serialize(), "&&") + + def test_serialize_or(self): + rule = BinaryOperatorRule([BINARY_OP("||")]) + self.assertEqual(rule.serialize(), "||") + + def test_serialize_gt(self): + rule = BinaryOperatorRule([BINARY_OP(">")]) + self.assertEqual(rule.serialize(), ">") + + def test_serialize_multiply(self): + rule = BinaryOperatorRule([BINARY_OP("*")]) + self.assertEqual(rule.serialize(), "*") + + def test_token_property(self): + token = BINARY_OP("+") + rule = BinaryOperatorRule([token]) + self.assertIs(rule.token, token) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py new file mode 100644 index 00000000..67fec075 --- /dev/null +++ b/test/unit/rules/test_strings.py @@ -0,0 +1,247 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.strings import ( + InterpolationRule, + StringPartRule, + StringRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs --- + + +class StubExpression(ExpressionRule): + """Minimal ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +# --- Helpers --- + + +def _make_string_part_chars(text): + return StringPartRule([STRING_CHARS(text)]) + + +def _make_string_part_escaped(text): + return StringPartRule([ESCAPED_INTERPOLATION(text)]) + + +def _make_string_part_interpolation(expr_value): + interp = InterpolationRule([INTERP_START(), StubExpression(expr_value), RBRACE()]) + return StringPartRule([interp]) + + +def _make_string(parts): + """Build StringRule from a list of StringPartRule children.""" + return StringRule([DBLQUOTE(), *parts, DBLQUOTE()]) + + +# --- InterpolationRule tests --- + + +class TestInterpolationRule(TestCase): + def test_lark_name(self): + self.assertEqual(InterpolationRule.lark_name(), "interpolation") + + def test_expression_property(self): + expr = StubExpression("var.name") + rule = InterpolationRule([INTERP_START(), expr, RBRACE()]) + self.assertIs(rule.expression, expr) + + def test_serialize_wraps_in_dollar_string(self): + rule = InterpolationRule([INTERP_START(), StubExpression("var.name"), RBRACE()]) + self.assertEqual(rule.serialize(), "${var.name}") + + def test_serialize_idempotent_if_already_dollar(self): + rule = InterpolationRule([INTERP_START(), StubExpression("${x}"), RBRACE()]) + self.assertEqual(rule.serialize(), "${x}") + + def test_serialize_expression_result(self): + rule = InterpolationRule([INTERP_START(), StubExpression("a + b"), RBRACE()]) + self.assertEqual(rule.serialize(), "${a + b}") + + +# --- StringPartRule tests --- + + +class TestStringPartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringPartRule.lark_name(), "string_part") + + def test_content_property_string_chars(self): + token = STRING_CHARS("hello") + rule = StringPartRule([token]) + self.assertIs(rule.content, token) + + def test_serialize_string_chars(self): + rule = _make_string_part_chars("hello world") + self.assertEqual(rule.serialize(), "hello world") + + def test_serialize_escaped_interpolation(self): + rule = _make_string_part_escaped("$${aws:username}") + self.assertEqual(rule.serialize(), "$${aws:username}") + + def test_serialize_interpolation(self): + rule = _make_string_part_interpolation("var.name") + self.assertEqual(rule.serialize(), "${var.name}") + + def test_content_property_interpolation(self): + interp = InterpolationRule([INTERP_START(), StubExpression("x"), RBRACE()]) + rule = StringPartRule([interp]) + self.assertIs(rule.content, interp) + + +# --- StringRule tests --- + + +class TestStringRule(TestCase): + def test_lark_name(self): + self.assertEqual(StringRule.lark_name(), "string") + + def test_string_parts_property(self): + p1 = _make_string_part_chars("hello") + p2 = _make_string_part_chars(" world") + rule = _make_string([p1, p2]) + self.assertEqual(rule.string_parts, [p1, p2]) + + def test_string_parts_empty(self): + rule = _make_string([]) + self.assertEqual(rule.string_parts, []) + + def test_serialize_plain_string(self): + rule = _make_string([_make_string_part_chars("hello")]) + self.assertEqual(rule.serialize(), '"hello"') + + def test_serialize_empty_string(self): + rule = _make_string([]) + self.assertEqual(rule.serialize(), '""') + + def test_serialize_concatenated_parts(self): + rule = _make_string( + [ + _make_string_part_chars("prefix:"), + _make_string_part_interpolation("var.name"), + _make_string_part_chars("-suffix"), + ] + ) + self.assertEqual(rule.serialize(), '"prefix:${var.name}-suffix"') + + def test_serialize_escaped_and_interpolation(self): + rule = _make_string( + [ + _make_string_part_interpolation("bar"), + _make_string_part_escaped("$${baz:bat}"), + ] + ) + self.assertEqual(rule.serialize(), '"${bar}$${baz:bat}"') + + def test_serialize_only_interpolation(self): + rule = _make_string([_make_string_part_interpolation("x")]) + self.assertEqual(rule.serialize(), '"${x}"') + + +# --- HeredocTemplateRule tests --- + + +class TestHeredocTemplateRule(TestCase): + def test_lark_name(self): + self.assertEqual(HeredocTemplateRule.lark_name(), "heredoc_template") + + def test_heredoc_property(self): + token = HEREDOC_TEMPLATE("< str: + return "test_inline" + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return "test" + + +def _make_nlc(text): + """Helper: build NewLineOrCommentRule from a string.""" + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +# --- Tests --- + + +class TestNewLineOrCommentRule(TestCase): + def test_lark_name(self): + self.assertEqual(NewLineOrCommentRule.lark_name(), "new_line_or_comment") + + def test_serialize_newline(self): + rule = _make_nlc("\n") + self.assertEqual(rule.serialize(), "\n") + + def test_serialize_line_comment(self): + rule = _make_nlc("// this is a comment\n") + self.assertEqual(rule.serialize(), "// this is a comment\n") + + def test_serialize_hash_comment(self): + rule = _make_nlc("# hash comment\n") + self.assertEqual(rule.serialize(), "# hash comment\n") + + def test_to_list_bare_newline_returns_none(self): + rule = _make_nlc("\n") + self.assertIsNone(rule.to_list()) + + def test_to_list_line_comment(self): + rule = _make_nlc("// my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_hash_comment(self): + rule = _make_nlc("# my comment\n") + result = rule.to_list() + self.assertEqual(result, ["my comment"]) + + def test_to_list_block_comment(self): + rule = _make_nlc("/* block comment */\n") + result = rule.to_list() + self.assertEqual(result, ["block comment"]) + + def test_to_list_multiple_comments(self): + rule = _make_nlc("// first\n// second\n") + result = rule.to_list() + self.assertIn("first", result) + self.assertIn("second", result) + + def test_token_property(self): + token = NL_OR_COMMENT("\n") + rule = NewLineOrCommentRule([token]) + self.assertIs(rule.token, token) + + +class TestInlineCommentMixIn(TestCase): + def test_insert_optionals_inserts_none_where_no_comment(self): + from hcl2.rules.tokens import NAME + + token = NAME("x") + children = [token, NAME("y")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should have inserted None at index 1, pushing NAME("y") to index 2 + self.assertIsNone(children[1]) + self.assertEqual(len(children), 3) + + def test_insert_optionals_leaves_comment_in_place(self): + comment = _make_nlc("// comment\n") + from hcl2.rules.tokens import NAME + + children = [NAME("x"), comment] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [1]) + # Should NOT insert None since index 1 is already a NewLineOrCommentRule + self.assertIs(children[1], comment) + self.assertEqual(len(children), 2) + + def test_insert_optionals_handles_index_error(self): + children = [_make_nlc("\n")] + mixin = ConcreteInlineComment.__new__(ConcreteInlineComment) + mixin._insert_optionals(children, [3]) + # Should insert None at index 3 + self.assertEqual(len(children), 2) + self.assertIsNone(children[1]) + + def test_inline_comments_collects_from_children(self): + comment = _make_nlc("// hello\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), comment]) + result = rule.inline_comments() + self.assertEqual(result, ["hello"]) + + def test_inline_comments_skips_bare_newlines(self): + newline = _make_nlc("\n") + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x"), newline]) + result = rule.inline_comments() + self.assertEqual(result, []) + + def test_inline_comments_recursive(self): + comment = _make_nlc("// inner\n") + inner = ConcreteInlineComment([comment]) + outer = ConcreteInlineComment([inner]) + result = outer.inline_comments() + self.assertEqual(result, ["inner"]) + + def test_inline_comments_empty(self): + from hcl2.rules.tokens import NAME + + rule = ConcreteInlineComment([NAME("x")]) + result = rule.inline_comments() + self.assertEqual(result, []) diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py new file mode 100644 index 00000000..f5f94e8c --- /dev/null +++ b/test/unit/test_utils.py @@ -0,0 +1,148 @@ +from unittest import TestCase + +from hcl2.utils import ( + SerializationOptions, + SerializationContext, + is_dollar_string, + to_dollar_string, + unwrap_dollar_string, + wrap_into_parentheses, +) + + +class TestSerializationOptions(TestCase): + def test_default_values(self): + opts = SerializationOptions() + self.assertTrue(opts.with_comments) + self.assertFalse(opts.with_meta) + self.assertFalse(opts.wrap_objects) + self.assertFalse(opts.wrap_tuples) + self.assertTrue(opts.explicit_blocks) + self.assertTrue(opts.preserve_heredocs) + self.assertFalse(opts.force_operation_parentheses) + + def test_custom_values(self): + opts = SerializationOptions( + with_comments=False, + with_meta=True, + force_operation_parentheses=True, + ) + self.assertFalse(opts.with_comments) + self.assertTrue(opts.with_meta) + self.assertTrue(opts.force_operation_parentheses) + + +class TestSerializationContext(TestCase): + def test_default_values(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_replace_returns_new_instance(self): + ctx = SerializationContext() + new_ctx = ctx.replace(inside_dollar_string=True) + self.assertIsNot(ctx, new_ctx) + self.assertFalse(ctx.inside_dollar_string) + self.assertTrue(new_ctx.inside_dollar_string) + + def test_modify_mutates_and_restores(self): + ctx = SerializationContext() + self.assertFalse(ctx.inside_dollar_string) + + with ctx.modify(inside_dollar_string=True): + self.assertTrue(ctx.inside_dollar_string) + + self.assertFalse(ctx.inside_dollar_string) + + def test_modify_restores_on_exception(self): + ctx = SerializationContext() + + with self.assertRaises(ValueError): + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + raise ValueError("test") + + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_modify_multiple_fields(self): + ctx = SerializationContext() + with ctx.modify(inside_dollar_string=True, inside_parentheses=True): + self.assertTrue(ctx.inside_dollar_string) + self.assertTrue(ctx.inside_parentheses) + self.assertFalse(ctx.inside_dollar_string) + self.assertFalse(ctx.inside_parentheses) + + def test_copy_yields_independent_copy(self): + ctx = SerializationContext() + with ctx.copy(inside_dollar_string=True) as copied: + self.assertTrue(copied.inside_dollar_string) + self.assertFalse(ctx.inside_dollar_string) + self.assertIsNot(ctx, copied) + + +class TestIsDollarString(TestCase): + def test_valid_dollar_string(self): + self.assertTrue(is_dollar_string("${x}")) + + def test_nested_dollar_string(self): + self.assertTrue(is_dollar_string("${a + b}")) + + def test_plain_string(self): + self.assertFalse(is_dollar_string("foo")) + + def test_incomplete_prefix(self): + self.assertFalse(is_dollar_string("${")) + + def test_non_string_input(self): + self.assertFalse(is_dollar_string(42)) + self.assertFalse(is_dollar_string(None)) + + def test_empty_dollar_string(self): + self.assertTrue(is_dollar_string("${}")) + + def test_dollar_without_brace(self): + self.assertFalse(is_dollar_string("$x}")) + + def test_missing_closing_brace(self): + self.assertFalse(is_dollar_string("${x")) + + +class TestToDollarString(TestCase): + def test_wraps_plain_string(self): + self.assertEqual(to_dollar_string("x"), "${x}") + + def test_idempotent_on_dollar_string(self): + self.assertEqual(to_dollar_string("${x}"), "${x}") + + def test_wraps_empty(self): + self.assertEqual(to_dollar_string(""), "${}") + + def test_wraps_expression(self): + self.assertEqual(to_dollar_string("a + b"), "${a + b}") + + +class TestUnwrapDollarString(TestCase): + def test_strips_wrapping(self): + self.assertEqual(unwrap_dollar_string("${x}"), "x") + + def test_noop_on_plain_string(self): + self.assertEqual(unwrap_dollar_string("foo"), "foo") + + def test_strips_complex_expression(self): + self.assertEqual(unwrap_dollar_string("${a + b}"), "a + b") + + +class TestWrapIntoParentheses(TestCase): + def test_plain_string(self): + self.assertEqual(wrap_into_parentheses("x"), "(x)") + + def test_dollar_string(self): + self.assertEqual(wrap_into_parentheses("${x}"), "${(x)}") + + def test_expression_string(self): + self.assertEqual(wrap_into_parentheses("a + b"), "(a + b)") + + def test_dollar_expression(self): + self.assertEqual(wrap_into_parentheses("${a + b}"), "${(a + b)}") From 7662a5e039db786e9400531df0516154e02de666 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:33 +0100 Subject: [PATCH 21/42] rewrite api.py, update builder.py, add unit tests for them --- hcl2/__init__.py | 13 +- hcl2/__main__.py | 4 +- hcl2/api.py | 219 ++++++++++++++++++++++++++++------ hcl2/builder.py | 17 ++- hcl2/deserializer.py | 4 +- test/unit/test_api.py | 244 ++++++++++++++++++++++++++++++++++++++ test/unit/test_builder.py | 157 ++++++++++++++++++++++++ 7 files changed, 607 insertions(+), 51 deletions(-) create mode 100644 test/unit/test_api.py create mode 100644 test/unit/test_builder.py diff --git a/hcl2/__init__.py b/hcl2/__init__.py index 2d5dad09..d3a9ea7b 100644 --- a/hcl2/__init__.py +++ b/hcl2/__init__.py @@ -8,10 +8,21 @@ from .api import ( load, loads, + dump, + dumps, parse, parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, transform, - writes, + serialize, ) from .builder import Builder +from .deserializer import DeserializerOptions +from .formatter import FormatterOptions +from .rules.base import StartRule +from .utils import SerializationOptions diff --git a/hcl2/__main__.py b/hcl2/__main__.py index 17a021e1..f1a58938 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -19,6 +19,7 @@ from lark import UnexpectedCharacters, UnexpectedToken from . import load +from .utils import SerializationOptions from .version import __version__ @@ -58,7 +59,8 @@ def main(): else open(args.OUT_PATH, "w", encoding="utf-8") ) print(args.PATH, file=sys.stderr, flush=True) - json.dump(load(in_file, with_meta=args.with_meta), out_file) + options = SerializationOptions(with_meta=True) if args.with_meta else None + json.dump(load(in_file, serialization_options=options), out_file) if args.OUT_PATH is None: out_file.write("\n") out_file.close() diff --git a/hcl2/api.py b/hcl2/api.py index 7c384c53..0238f418 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -1,60 +1,205 @@ -"""The API that will be exposed to users of this package""" -from typing import TextIO +"""The API that will be exposed to users of this package. + +Follows the json module convention: load/loads for reading, dump/dumps for writing. +Also exposes intermediate pipeline stages for advanced usage. +""" + +import json as _json +from typing import TextIO, Optional from lark.tree import Tree -from hcl2.parser import parser + +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter, FormatterOptions +from hcl2.parser import parser as _get_parser from hcl2.reconstructor import HCLReconstructor +from hcl2.rules.base import StartRule from hcl2.transformer import RuleTransformer +from hcl2.utils import SerializationOptions + + +# --------------------------------------------------------------------------- +# Primary API: load / loads / dump / dumps +# --------------------------------------------------------------------------- + + +def load( + file: TextIO, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load a HCL2 file and return a Python dict. + + :param file: File with HCL2 content. + :param serialization_options: Options controlling serialization behavior. + """ + return loads(file.read(), serialization_options=serialization_options) + + +def loads( + text: str, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Load HCL2 from a string and return a Python dict. + :param text: HCL2 text. + :param serialization_options: Options controlling serialization behavior. + """ + tree = parses(text) + return serialize(tree, serialization_options=serialization_options) + + +def dump( + data: dict, + file: TextIO, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> None: + """Write a Python dict as HCL2 to a file. -def load(file: TextIO, with_meta=False) -> dict: - """Load a HCL2 file. - :param file: File with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. + :param data: Python dict (as produced by :func:`load`). + :param file: Writable text file. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - return loads(file.read(), with_meta=with_meta) + file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) -def loads(text: str, with_meta=False) -> dict: - """Load HCL2 from a string. - :param text: Text with hcl2 to be loaded as a dict. - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def dumps( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, +) -> str: + """Convert a Python dict to an HCL2 string. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. """ - # append new line as a workaround for https://github.com/lark-parser/lark/issues/237 + tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + return reconstruct(tree) + + +# --------------------------------------------------------------------------- +# Parsing: HCL text -> LarkElement tree or raw Lark tree +# --------------------------------------------------------------------------- + + +def parse(file: TextIO, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 file into a LarkElement tree. + + :param file: File with HCL2 content. + :param discard_comments: If True, discard comments during transformation. + """ + return parses(file.read(), discard_comments=discard_comments) + + +def parses(text: str, *, discard_comments: bool = False) -> StartRule: + """Parse a HCL2 string into a LarkElement tree. + + :param text: HCL2 text. + :param discard_comments: If True, discard comments during transformation. + """ + lark_tree = parses_to_tree(text) + return transform(lark_tree, discard_comments=discard_comments) + + +def parse_to_tree(file: TextIO) -> Tree: + """Parse a HCL2 file into a raw Lark parse tree. + + :param file: File with HCL2 content. + """ + return parses_to_tree(file.read()) + + +def parses_to_tree(text: str) -> Tree: + """Parse a HCL2 string into a raw Lark parse tree. + + :param text: HCL2 text. + """ + # Append newline as workaround for https://github.com/lark-parser/lark/issues/237 # Lark doesn't support EOF token so our grammar can't look for "new line or end of file" - # This means that all blocks must end in a new line even if the file ends - # Append a new line as a temporary fix - tree = parser().parse(text + "\n") - return RuleTransformer().transform(tree) + return _get_parser().parse(text + "\n") -def parse(file: TextIO) -> Tree: - """Load HCL2 syntax tree from a file. - :param file: File with hcl2 to be loaded as a dict. +# --------------------------------------------------------------------------- +# Intermediate pipeline stages +# --------------------------------------------------------------------------- + + +def from_dict( + data: dict, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a Python dict into a LarkElement tree. + + :param data: Python dict (as produced by :func:`load`). + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. + """ + deserializer = BaseDeserializer(deserializer_options) + tree = deserializer.load_python(data) + if format: + formatter = BaseFormatter(formatter_options) + formatter.format_tree(tree) + return tree + + +def from_json( + text: str, + *, + deserializer_options: Optional[DeserializerOptions] = None, + formatter_options: Optional[FormatterOptions] = None, + format: bool = True, +) -> StartRule: + """Convert a JSON string into a LarkElement tree. + + :param text: JSON string. + :param deserializer_options: Options controlling deserialization behavior. + :param formatter_options: Options controlling formatting behavior. + :param format: If True (default), apply formatting to the tree. """ - return parses(file.read()) + data = _json.loads(text) + return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + +def reconstruct(tree) -> str: + """Convert a LarkElement tree (or raw Lark tree) to an HCL2 string. -def parses(text: str) -> Tree: - """Load HCL2 syntax tree from a string. - :param text: Text with hcl2 to be loaded as a dict. + :param tree: A :class:`StartRule` (LarkElement tree) or :class:`lark.Tree`. """ - return parser().parse(text) + reconstructor = HCLReconstructor() + if isinstance(tree, StartRule): + tree = tree.to_lark() + return reconstructor.reconstruct(tree) -def transform(ast: Tree, with_meta=False) -> dict: - """Convert an HCL2 AST to a dictionary. - :param ast: HCL2 syntax tree, output from `parse` or `parses` - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. +def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: + """Transform a raw Lark parse tree into a LarkElement tree. + + :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. + :param discard_comments: If True, discard comments during transformation. """ - return RuleTransformer().transform(ast) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + +def serialize( + tree: StartRule, + *, + serialization_options: Optional[SerializationOptions] = None, +) -> dict: + """Serialize a LarkElement tree to a Python dict. -def writes(ast: Tree) -> str: - """Convert an HCL2 syntax tree to a string. - :param ast: HCL2 syntax tree, output from `parse` or `parses` + :param tree: A :class:`StartRule` (LarkElement tree). + :param serialization_options: Options controlling serialization behavior. """ - return HCLReconstructor().reconstruct(ast) + if serialization_options is not None: + return tree.serialize(options=serialization_options) + return tree.serialize() diff --git a/hcl2/builder.py b/hcl2/builder.py index b5b149da..5ef0c416 100644 --- a/hcl2/builder.py +++ b/hcl2/builder.py @@ -3,18 +3,16 @@ from collections import defaultdict -from hcl2.const import START_LINE_KEY, END_LINE_KEY +from hcl2.const import IS_BLOCK class Builder: """ The `hcl2.Builder` class produces a dictionary that should be identical to the - output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword - argument is important here. HCL "blocks" in the Python dictionary are - identified by the presence of `__start_line__` and `__end_line__` metadata - within them. The `Builder` class handles adding that metadata. If that metadata - is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to - identify what is a block and what is just an attribute with an object value. + output of `hcl2.load(example_file)`. HCL "blocks" in the Python dictionary are + identified by the presence of `__is_block__: True` markers within them. + The `Builder` class handles adding that marker. If that marker is missing, + the deserializer fails to distinguish blocks from regular object attributes. """ def __init__(self, attributes: Optional[dict] = None): @@ -49,8 +47,7 @@ def build(self): body.update( { - START_LINE_KEY: -1, - END_LINE_KEY: -1, + IS_BLOCK: True, **self.attributes, } ) @@ -79,7 +76,7 @@ def _add_nested_blocks( """Add nested blocks defined within another `Builder` instance to the `block` dictionary""" nested_block = nested_blocks_builder.build() for key, value in nested_block.items(): - if key not in (START_LINE_KEY, END_LINE_KEY): + if key != IS_BLOCK: if key not in block.keys(): block[key] = [] block[key].extend(value) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index d6b4d4c2..0ca91b48 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -6,7 +6,7 @@ from regex import regex -from hcl2 import parses +from hcl2.parser import parser as _get_parser from hcl2.const import IS_BLOCK from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( @@ -217,7 +217,7 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # create HCL2 snippet value = f"temp = {value}" # parse the above - parsed_tree = parses(value) + parsed_tree = _get_parser().parse(value) # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree diff --git a/test/unit/test_api.py b/test/unit/test_api.py new file mode 100644 index 00000000..a87d9e32 --- /dev/null +++ b/test/unit/test_api.py @@ -0,0 +1,244 @@ +from io import StringIO +from unittest import TestCase + +from hcl2.api import ( + load, + loads, + dump, + dumps, + parse, + parses, + parse_to_tree, + parses_to_tree, + from_dict, + from_json, + reconstruct, + transform, + serialize, +) +from hcl2.rules.base import StartRule +from hcl2.utils import SerializationOptions +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from lark.tree import Tree + + +SIMPLE_HCL = 'x = 5\n' +SIMPLE_DICT = {"x": 5} + +BLOCK_HCL = 'resource "aws_instance" "example" {\n ami = "abc-123"\n}\n' + + +class TestLoads(TestCase): + + def test_simple_attribute(self): + result = loads(SIMPLE_HCL) + self.assertEqual(result["x"], 5) + + def test_returns_dict(self): + result = loads(SIMPLE_HCL) + self.assertIsInstance(result, dict) + + def test_with_serialization_options(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_meta_option(self): + result = loads(SIMPLE_HCL, serialization_options=SerializationOptions(with_meta=True)) + self.assertIn("x", result) + + def test_block_parsing(self): + result = loads(BLOCK_HCL) + self.assertIn("resource", result) + + +class TestLoad(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = load(f) + self.assertEqual(result["x"], 5) + + def test_with_serialization_options(self): + f = StringIO(SIMPLE_HCL) + result = load(f, serialization_options=SerializationOptions(with_comments=False)) + self.assertEqual(result["x"], 5) + + +class TestDumps(TestCase): + + def test_simple_attribute(self): + result = dumps(SIMPLE_DICT) + self.assertIsInstance(result, str) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_dumps_contains_key_and_value(self): + result = dumps(SIMPLE_DICT) + self.assertIn("x", result) + self.assertIn("5", result) + + def test_with_deserializer_options(self): + result = dumps(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, str) + + def test_with_formatter_options(self): + result = dumps(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, str) + + +class TestDump(TestCase): + + def test_writes_to_file(self): + f = StringIO() + dump(SIMPLE_DICT, f) + output = f.getvalue() + self.assertIn("x", output) + self.assertIn("5", output) + + +class TestParsesToTree(TestCase): + + def test_returns_lark_tree(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertIsInstance(result, Tree) + + def test_tree_has_start_rule(self): + result = parses_to_tree(SIMPLE_HCL) + self.assertEqual(result.data, "start") + + +class TestParseToTree(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse_to_tree(f) + self.assertIsInstance(result, Tree) + + +class TestParses(TestCase): + + def test_returns_start_rule(self): + result = parses(SIMPLE_HCL) + self.assertIsInstance(result, StartRule) + + def test_discard_comments_false(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=False) + serialized = serialize(result) + self.assertIn("__comments__", serialized) + + def test_discard_comments_true(self): + hcl = '# comment\nx = 5\n' + result = parses(hcl, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestParse(TestCase): + + def test_from_file(self): + f = StringIO(SIMPLE_HCL) + result = parse(f) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + f = StringIO('# comment\nx = 5\n') + result = parse(f, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestTransform(TestCase): + + def test_transforms_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = transform(lark_tree) + self.assertIsInstance(result, StartRule) + + def test_discard_comments(self): + lark_tree = parses_to_tree('# comment\nx = 5\n') + result = transform(lark_tree, discard_comments=True) + serialized = serialize(result) + self.assertNotIn("__comments__", serialized) + + +class TestSerialize(TestCase): + + def test_returns_dict(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree) + self.assertIsInstance(result, dict) + self.assertEqual(result["x"], 5) + + def test_with_options(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=SerializationOptions(with_comments=False)) + self.assertIsInstance(result, dict) + + def test_none_options_uses_defaults(self): + tree = parses(SIMPLE_HCL) + result = serialize(tree, serialization_options=None) + self.assertEqual(result["x"], 5) + + +class TestFromDict(TestCase): + + def test_returns_start_rule(self): + result = from_dict(SIMPLE_DICT) + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_dict(SIMPLE_DICT) + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_dict(SIMPLE_DICT, format=False) + self.assertIsInstance(result, StartRule) + + def test_with_deserializer_options(self): + result = from_dict(SIMPLE_DICT, deserializer_options=DeserializerOptions()) + self.assertIsInstance(result, StartRule) + + def test_with_formatter_options(self): + result = from_dict(SIMPLE_DICT, formatter_options=FormatterOptions()) + self.assertIsInstance(result, StartRule) + + +class TestFromJson(TestCase): + + def test_returns_start_rule(self): + result = from_json('{"x": 5}') + self.assertIsInstance(result, StartRule) + + def test_roundtrip(self): + tree = from_json('{"x": 5}') + result = serialize(tree) + self.assertEqual(result["x"], 5) + + def test_without_formatting(self): + result = from_json('{"x": 5}', format=False) + self.assertIsInstance(result, StartRule) + + +class TestReconstruct(TestCase): + + def test_from_start_rule(self): + tree = parses(SIMPLE_HCL) + result = reconstruct(tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_from_lark_tree(self): + lark_tree = parses_to_tree(SIMPLE_HCL) + result = reconstruct(lark_tree) + self.assertIsInstance(result, str) + self.assertIn("x", result) + + def test_roundtrip(self): + tree = parses(SIMPLE_HCL) + hcl_text = reconstruct(tree) + reparsed = loads(hcl_text) + self.assertEqual(reparsed["x"], 5) diff --git a/test/unit/test_builder.py b/test/unit/test_builder.py new file mode 100644 index 00000000..5d411c64 --- /dev/null +++ b/test/unit/test_builder.py @@ -0,0 +1,157 @@ +from unittest import TestCase + +from hcl2.builder import Builder +from hcl2.const import IS_BLOCK + + +class TestBuilderAttributes(TestCase): + + def test_empty_builder(self): + b = Builder() + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + + def test_with_attributes(self): + b = Builder({"key": "value", "count": 3}) + result = b.build() + self.assertEqual(result["key"], "value") + self.assertEqual(result["count"], 3) + + def test_is_block_marker_present(self): + b = Builder({"x": 1}) + result = b.build() + self.assertTrue(result[IS_BLOCK]) + + +class TestBuilderBlock(TestCase): + + def test_simple_block(self): + b = Builder() + b.block("resource") + result = b.build() + self.assertIn("resource", result) + self.assertEqual(len(result["resource"]), 1) + + def test_block_with_labels(self): + b = Builder() + b.block("resource", labels=["aws_instance", "example"]) + result = b.build() + block_entry = result["resource"][0] + self.assertIn("aws_instance", block_entry) + inner = block_entry["aws_instance"] + self.assertIn("example", inner) + + def test_block_with_attributes(self): + b = Builder() + b.block("resource", labels=["type"], ami="abc-123") + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["ami"], "abc-123") + + def test_multiple_blocks_same_type(self): + b = Builder() + b.block("resource", labels=["type_a"]) + b.block("resource", labels=["type_b"]) + result = b.build() + self.assertEqual(len(result["resource"]), 2) + + def test_multiple_block_types(self): + b = Builder() + b.block("resource") + b.block("data") + result = b.build() + self.assertIn("resource", result) + self.assertIn("data", result) + + def test_block_returns_builder(self): + b = Builder() + child = b.block("resource") + self.assertIsInstance(child, Builder) + + def test_block_child_attributes(self): + b = Builder() + child = b.block("resource", labels=["type"]) + child.attributes["nested_key"] = "nested_val" + # Rebuild to pick up the changes + result = b.build() + block = result["resource"][0]["type"] + self.assertEqual(block["nested_key"], "nested_val") + + def test_self_reference_raises(self): + b = Builder() + with self.assertRaises(ValueError): + b.block("resource", __nested_builder__=b) + + +class TestBuilderNestedBlocks(TestCase): + + def test_nested_builder(self): + b = Builder() + inner = Builder() + inner.block("provisioner", labels=["local-exec"], command="echo hello") + b.block("resource", labels=["type"], __nested_builder__=inner) + result = b.build() + block = result["resource"][0]["type"] + self.assertIn("provisioner", block) + + def test_nested_blocks_merged(self): + b = Builder() + inner = Builder() + inner.block("sub_block", x=1) + inner.block("sub_block", x=2) + b.block("resource", __nested_builder__=inner) + result = b.build() + block = result["resource"][0] + self.assertEqual(len(block["sub_block"]), 2) + + +class TestBuilderBlockMarker(TestCase): + + def test_block_marker_is_is_block(self): + """Verify IS_BLOCK marker is used (not __start_line__/__end_line__).""" + b = Builder({"x": 1}) + result = b.build() + self.assertIn(IS_BLOCK, result) + self.assertTrue(result[IS_BLOCK]) + self.assertNotIn("__start_line__", result) + self.assertNotIn("__end_line__", result) + + def test_nested_blocks_skip_is_block_key(self): + """_add_nested_blocks should skip IS_BLOCK when merging.""" + b = Builder() + inner = Builder() + inner.block("sub", val=1) + b.block("parent", __nested_builder__=inner) + result = b.build() + parent_block = result["parent"][0] + # sub blocks should be present, but IS_BLOCK from inner should not leak as a list + self.assertIn("sub", parent_block) + # IS_BLOCK should be a bool marker, not a list + self.assertTrue(parent_block[IS_BLOCK]) + + +class TestBuilderIntegration(TestCase): + + def test_full_document(self): + doc = Builder() + doc.block( + "resource", + labels=["aws_instance", "web"], + ami="ami-12345", + instance_type="t2.micro", + ) + doc.block( + "resource", + labels=["aws_s3_bucket", "data"], + bucket="my-bucket", + ) + result = doc.build() + self.assertEqual(len(result["resource"]), 2) + + web = result["resource"][0]["aws_instance"]["web"] + self.assertEqual(web["ami"], "ami-12345") + self.assertEqual(web["instance_type"], "t2.micro") + + data = result["resource"][1]["aws_s3_bucket"]["data"] + self.assertEqual(data["bucket"], "my-bucket") From c05273d26e1c751266f1c924a9a96f12ac5fcdc9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:06:58 +0100 Subject: [PATCH 22/42] reorganize "round-trip" tests into integration tests --- test/{round_trip => integration}/__init__.py | 0 .../hcl2_original/operators.tf} | 0 .../hcl2_original/smoke.tf | 0 .../hcl2_reconstructed/operators.tf} | 0 .../hcl2_reconstructed/smoke.tf | 0 .../json_reserialized/operators.json} | 0 .../json_reserialized/smoke.json | 0 .../json_serialized/operators.json} | 0 .../json_serialized/smoke.json | 0 .../specialized/builder_basic.json | 63 +++++++++++++++ test/integration/specialized/builder_basic.tf | 38 +++++++++ .../specialized/builder_basic_reparsed.json | 64 +++++++++++++++ .../builder_basic_reserialized.json | 62 +++++++++++++++ .../specialized}/operator_precedence.json | 0 .../specialized/operator_precedence.tf | 15 ++++ .../test_round_trip.py | 48 +++--------- test/integration/test_specialized.py | 77 +++++++++++++++++++ 17 files changed, 331 insertions(+), 36 deletions(-) rename test/{round_trip => integration}/__init__.py (100%) rename test/{round_trip/hcl2_original/operator_precedence.tf => integration/hcl2_original/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_original/smoke.tf (100%) rename test/{round_trip/hcl2_reconstructed/operator_precedence.tf => integration/hcl2_reconstructed/operators.tf} (100%) rename test/{round_trip => integration}/hcl2_reconstructed/smoke.tf (100%) rename test/{round_trip/json_reserialized/operator_precedence.json => integration/json_reserialized/operators.json} (100%) rename test/{round_trip => integration}/json_reserialized/smoke.json (100%) rename test/{round_trip/json_serialized/operator_precedence.json => integration/json_serialized/operators.json} (100%) rename test/{round_trip => integration}/json_serialized/smoke.json (100%) create mode 100644 test/integration/specialized/builder_basic.json create mode 100644 test/integration/specialized/builder_basic.tf create mode 100644 test/integration/specialized/builder_basic_reparsed.json create mode 100644 test/integration/specialized/builder_basic_reserialized.json rename test/{round_trip/special => integration/specialized}/operator_precedence.json (100%) create mode 100644 test/integration/specialized/operator_precedence.tf rename test/{round_trip => integration}/test_round_trip.py (78%) create mode 100644 test/integration/test_specialized.py diff --git a/test/round_trip/__init__.py b/test/integration/__init__.py similarity index 100% rename from test/round_trip/__init__.py rename to test/integration/__init__.py diff --git a/test/round_trip/hcl2_original/operator_precedence.tf b/test/integration/hcl2_original/operators.tf similarity index 100% rename from test/round_trip/hcl2_original/operator_precedence.tf rename to test/integration/hcl2_original/operators.tf diff --git a/test/round_trip/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf similarity index 100% rename from test/round_trip/hcl2_original/smoke.tf rename to test/integration/hcl2_original/smoke.tf diff --git a/test/round_trip/hcl2_reconstructed/operator_precedence.tf b/test/integration/hcl2_reconstructed/operators.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/operator_precedence.tf rename to test/integration/hcl2_reconstructed/operators.tf diff --git a/test/round_trip/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf similarity index 100% rename from test/round_trip/hcl2_reconstructed/smoke.tf rename to test/integration/hcl2_reconstructed/smoke.tf diff --git a/test/round_trip/json_reserialized/operator_precedence.json b/test/integration/json_reserialized/operators.json similarity index 100% rename from test/round_trip/json_reserialized/operator_precedence.json rename to test/integration/json_reserialized/operators.json diff --git a/test/round_trip/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json similarity index 100% rename from test/round_trip/json_reserialized/smoke.json rename to test/integration/json_reserialized/smoke.json diff --git a/test/round_trip/json_serialized/operator_precedence.json b/test/integration/json_serialized/operators.json similarity index 100% rename from test/round_trip/json_serialized/operator_precedence.json rename to test/integration/json_serialized/operators.json diff --git a/test/round_trip/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json similarity index 100% rename from test/round_trip/json_serialized/smoke.json rename to test/integration/json_serialized/smoke.json diff --git a/test/integration/specialized/builder_basic.json b/test/integration/specialized/builder_basic.json new file mode 100644 index 00000000..da62720b --- /dev/null +++ b/test/integration/specialized/builder_basic.json @@ -0,0 +1,63 @@ +{ + "__is_block__": true, + "resource": [ + { + "aws_instance": { + "web": { + "__is_block__": true, + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2 + } + } + }, + { + "aws_s3_bucket": { + "data": { + "__is_block__": true, + "bucket": "\"my-bucket\"", + "acl": "\"private\"" + } + } + }, + { + "aws_instance": { + "nested": { + "__is_block__": true, + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "__is_block__": true, + "command": "\"echo hello\"" + } + }, + { + "remote-exec": { + "__is_block__": true, + "inline": "[\"puppet apply\"]" + } + } + ] + } + } + } + ], + "variable": [ + { + "instance_type": { + "__is_block__": true, + "default": "\"t2.micro\"", + "description": "\"The instance type\"" + } + } + ], + "locals": [ + { + "__is_block__": true, + "port": 8080, + "enabled": true, + "name": "\"my-app\"" + } + ] +} diff --git a/test/integration/specialized/builder_basic.tf b/test/integration/specialized/builder_basic.tf new file mode 100644 index 00000000..b7ee2131 --- /dev/null +++ b/test/integration/specialized/builder_basic.tf @@ -0,0 +1,38 @@ +resource aws_instance web { + ami = "ami-12345" + instance_type = "t2.micro" + count = 2 +} + + +resource aws_s3_bucket data { + bucket = "my-bucket" + acl = "private" +} + + +resource aws_instance nested { + ami = "ami-99999" + + provisioner local-exec { + command = "echo hello" + } + + + provisioner remote-exec { + inline = ["puppet apply"] + } +} + + +variable instance_type { + default = "t2.micro" + description = "The instance type" +} + + +locals { + port = 8080 + enabled = true + name = "my-app" +} diff --git a/test/integration/specialized/builder_basic_reparsed.json b/test/integration/specialized/builder_basic_reparsed.json new file mode 100644 index 00000000..32e4954d --- /dev/null +++ b/test/integration/specialized/builder_basic_reparsed.json @@ -0,0 +1,64 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": [ + "\"puppet apply\"" + ], + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/specialized/builder_basic_reserialized.json b/test/integration/specialized/builder_basic_reserialized.json new file mode 100644 index 00000000..364ef0c3 --- /dev/null +++ b/test/integration/specialized/builder_basic_reserialized.json @@ -0,0 +1,62 @@ +{ + "resource": [ + { + "aws_instance": { + "web": { + "ami": "\"ami-12345\"", + "instance_type": "\"t2.micro\"", + "count": 2, + "__is_block__": true + } + } + }, + { + "aws_s3_bucket": { + "data": { + "bucket": "\"my-bucket\"", + "acl": "\"private\"", + "__is_block__": true + } + } + }, + { + "aws_instance": { + "nested": { + "ami": "\"ami-99999\"", + "provisioner": [ + { + "local-exec": { + "command": "\"echo hello\"", + "__is_block__": true + } + }, + { + "remote-exec": { + "inline": "[\"puppet apply\"]", + "__is_block__": true + } + } + ], + "__is_block__": true + } + } + } + ], + "variable": [ + { + "instance_type": { + "default": "\"t2.micro\"", + "description": "\"The instance type\"", + "__is_block__": true + } + } + ], + "locals": [ + { + "port": 8080, + "enabled": "true", + "name": "\"my-app\"", + "__is_block__": true + } + ] +} diff --git a/test/round_trip/special/operator_precedence.json b/test/integration/specialized/operator_precedence.json similarity index 100% rename from test/round_trip/special/operator_precedence.json rename to test/integration/specialized/operator_precedence.json diff --git a/test/integration/specialized/operator_precedence.tf b/test/integration/specialized/operator_precedence.tf new file mode 100644 index 00000000..f8351161 --- /dev/null +++ b/test/integration/specialized/operator_precedence.tf @@ -0,0 +1,15 @@ +locals { + addition_1 = ((a + b) + c) + addition_2 = a + b + addition_3 = (a + b) + eq_before_and = var.env == "prod" && var.debug + and_before_ternary = true && true ? 1 : 0 + mixed_arith_cmp = var.a + var.b * var.c > 10 + full_chain = a + b == c && d || e + left_assoc_sub = a - b - c + left_assoc_mul_div = (a * b) / c + nested_ternary = (a ? b : c) ? d : e + unary_precedence = !a && b + neg_precedence = (-a) + b + neg_parentheses = -(a + b) +} diff --git a/test/round_trip/test_round_trip.py b/test/integration/test_round_trip.py similarity index 78% rename from test/round_trip/test_round_trip.py rename to test/integration/test_round_trip.py index 93fcd111..3d2bbbb0 100644 --- a/test/round_trip/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -1,7 +1,7 @@ """Round-trip tests for the HCL2 → JSON → HCL2 pipeline. -Every test starts from the source HCL files in test/round_trip/hcl2/ and -runs the pipeline forward from there, comparing actuals against expected +Every test starts from the source HCL files in test/integration/hcl2_original/ +and runs the pipeline forward from there, comparing actuals against expected outputs at each stage: 1. HCL → JSON serialization (parse + transform + serialize) @@ -16,23 +16,20 @@ from typing import List from unittest import TestCase -from hcl2 import parses +from hcl2.api import parses_to_tree from hcl2.deserializer import BaseDeserializer from hcl2.formatter import BaseFormatter from hcl2.reconstructor import HCLReconstructor from hcl2.transformer import RuleTransformer -from hcl2.utils import SerializationOptions -ROUND_TRIP_DIR = Path(__file__).absolute().parent -HCL2_ORIGINAL_DIR = ROUND_TRIP_DIR / "hcl2_original" - -SPECIAL_DIR = ROUND_TRIP_DIR / "special" +INTEGRATION_DIR = Path(__file__).absolute().parent +HCL2_ORIGINAL_DIR = INTEGRATION_DIR / "hcl2_original" _STEP_DIRS = { "hcl2_original": HCL2_ORIGINAL_DIR, - "hcl2_reconstructed": ROUND_TRIP_DIR / "hcl2_reconstructed", - "json_serialized": ROUND_TRIP_DIR / "json_serialized", - "json_reserialized": ROUND_TRIP_DIR / "json_reserialized", + "hcl2_reconstructed": INTEGRATION_DIR / "hcl2_reconstructed", + "json_serialized": INTEGRATION_DIR / "json_serialized", + "json_reserialized": INTEGRATION_DIR / "json_reserialized", } _STEP_SUFFIXES = { @@ -53,7 +50,7 @@ class SuiteStep(Enum): def _get_suites() -> List[str]: """ Get a list of the test suites. - Names of a test suite is a name of file in `test/round_trip/hcl2_original/` without the .tf suffix. + Names of a test suite is a name of file in `test/integration/hcl2_original/` without the .tf suffix. Override SUITES to run a specific subset, e.g. SUITES = ["config"] """ @@ -63,7 +60,7 @@ def _get_suites() -> List[str]: # set this to arbitrary list of test suites to run, -# e.g. `SUITES = ["smoke"]` to run the tests only for `test/round_trip/hcl2_original/smoke.tf` +# e.g. `SUITES = ["smoke"]` to run the tests only for `test/integration/hcl2_original/smoke.tf` SUITES: List[str] = [] @@ -72,9 +69,9 @@ def _get_suite_file(suite_name: str, step: SuiteStep) -> Path: return _STEP_DIRS[step.value] / (suite_name + _STEP_SUFFIXES[step.value]) -def _parse_and_serialize(hcl_text: str, options: SerializationOptions = None) -> dict: +def _parse_and_serialize(hcl_text: str, options=None) -> dict: """Parse HCL text and serialize to a Python dict.""" - parsed_tree = parses(hcl_text) + parsed_tree = parses_to_tree(hcl_text) rules = RuleTransformer().transform(parsed_tree) if options: return rules.serialize(options=options) @@ -192,24 +189,3 @@ def test_full_round_trip(self): f"Full round-trip mismatch for {suite}: " f"HCL → JSON → HCL → JSON did not produce identical JSON", ) - - -class TestOperatorPrecedence(TestCase): - """Test that parsed expressions correctly represent operator precedence. - - Serializes with force_operation_parentheses=True so that implicit - precedence becomes explicit parentheses in the output. - See: https://github.com/amplify-education/python-hcl2/issues/248 - """ - - maxDiff = None - _OPTIONS = SerializationOptions(force_operation_parentheses=True) - - def test_operator_precedence(self): - hcl_path = _get_suite_file("operator_precedence", SuiteStep.ORIGINAL) - json_path = SPECIAL_DIR / "operator_precedence.json" - - actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) - expected = json.loads(json_path.read_text()) - - self.assertEqual(actual, expected) diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py new file mode 100644 index 00000000..d1b817e2 --- /dev/null +++ b/test/integration/test_specialized.py @@ -0,0 +1,77 @@ +"""Specialized integration tests for specific features and scenarios. + +Unlike the suite-based round-trip tests, these target individual features +(operator precedence, Builder round-trip) with dedicated golden files +in test/integration/special/. +""" + +import json +from pathlib import Path +from unittest import TestCase + +from hcl2.utils import SerializationOptions + +from test.integration.test_round_trip import ( + _parse_and_serialize, + _deserialize_and_reserialize, + _deserialize_and_reconstruct, +) + +SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" + + +class TestOperatorPrecedence(TestCase): + """Test that parsed expressions correctly represent operator precedence. + + Serializes with force_operation_parentheses=True so that implicit + precedence becomes explicit parentheses in the output. + See: https://github.com/amplify-education/python-hcl2/issues/248 + """ + + maxDiff = None + _OPTIONS = SerializationOptions(force_operation_parentheses=True) + + def test_operator_precedence(self): + hcl_path = SPECIAL_DIR / "operator_precedence.tf" + json_path = SPECIAL_DIR / "operator_precedence.json" + + actual = _parse_and_serialize(hcl_path.read_text(), options=self._OPTIONS) + expected = json.loads(json_path.read_text()) + + self.assertEqual(actual, expected) + + +class TestBuilderRoundTrip(TestCase): + """Test that dicts produced by Builder can be deserialized, reconstructed to + valid HCL, and reparsed back to equivalent dicts. + + Pipeline: Builder.build() → from_dict → reconstruct → HCL text + HCL text → parse → serialize → dict (compare with expected) + """ + + maxDiff = None + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_builder_reconstruction(self): + """Builder dict → deserialize → reconstruct → compare with expected HCL.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + actual_hcl = _deserialize_and_reconstruct(builder_dict) + expected_hcl = self._load_special("builder_basic", ".tf") + self.assertMultiLineEqual(actual_hcl, expected_hcl) + + def test_builder_full_round_trip(self): + """Builder dict → reconstruct → reparse → compare with expected JSON.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reconstructed_hcl = _deserialize_and_reconstruct(builder_dict) + actual = _parse_and_serialize(reconstructed_hcl) + expected = json.loads(self._load_special("builder_basic_reparsed", ".json")) + self.assertEqual(actual, expected) + + def test_builder_reserialization(self): + """Builder dict → deserialize → reserialize → compare with expected dict.""" + builder_dict = json.loads(self._load_special("builder_basic", ".json")) + reserialized = _deserialize_and_reserialize(builder_dict) + expected = json.loads(self._load_special("builder_basic_reserialized", ".json")) + self.assertEqual(reserialized, expected) From cf33fb3a05cd67c09607904f2f5ba798e6c1e2e2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 22 Feb 2026 20:17:55 +0100 Subject: [PATCH 23/42] increase coverage failure threshold --- .coveragerc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 30e6dc8c..3907df05 100644 --- a/.coveragerc +++ b/.coveragerc @@ -5,8 +5,8 @@ omit = hcl2/lark_parser.py hcl2/version.py hcl2/__init__.py - hcl2/rules/__init__.py + hcl2/rules/__init__.py [report] show_missing = true -fail_under = 80 +fail_under = 90 From 020d141cbb7619c32ebd46b7b30d8ed26c813aed Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 13:51:12 +0100 Subject: [PATCH 24/42] migrate some of existing round-trip tests to the new style, fix some related bugs --- hcl2/deserializer.py | 15 ++++++--- hcl2/hcl2.lark | 3 +- hcl2/rules/containers.py | 6 +++- hcl2/transformer.py | 3 ++ .../hcl2_original/floats.tf} | 0 .../hcl2_original}/nulls.tf | 0 test/integration/hcl2_original/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_original/smoke.tf | 11 ------- .../hcl2_original}/string_interpolations.tf | 6 ++-- .../hcl2_original}/unicode_strings.tf | 0 test/integration/hcl2_reconstructed/floats.tf | 26 ++++++++++++++++ test/integration/hcl2_reconstructed/nulls.tf | 11 +++++++ .../hcl2_reconstructed/object_keys.tf | 8 +++++ .../resource_keyword_attribute.tf | 8 +++++ test/integration/hcl2_reconstructed/smoke.tf | 11 ------- .../string_interpolations.tf | 9 ++++++ .../hcl2_reconstructed/unicode_strings.tf | 21 +++++++++++++ .../integration/json_reserialized/floats.json | 31 +++++++++++++++++++ test/integration/json_reserialized/nulls.json | 13 ++++++++ .../json_reserialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_reserialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_reserialized/unicode_strings.json | 21 +++++++++++++ test/integration/json_serialized/floats.json | 31 +++++++++++++++++++ test/integration/json_serialized/nulls.json | 13 ++++++++ .../json_serialized/object_keys.json | 10 ++++++ .../resource_keyword_attribute.json | 17 ++++++++++ test/integration/json_serialized/smoke.json | 14 --------- .../string_interpolations.json | 18 +++++++++++ .../json_serialized/unicode_strings.json | 21 +++++++++++++ test/unit/rules/test_containers.py | 4 +-- 33 files changed, 344 insertions(+), 62 deletions(-) rename test/{helpers/terraform-config/test_floats.tf => integration/hcl2_original/floats.tf} (100%) rename test/{helpers/terraform-config => integration/hcl2_original}/nulls.tf (100%) create mode 100644 test/integration/hcl2_original/object_keys.tf create mode 100644 test/integration/hcl2_original/resource_keyword_attribute.tf rename test/{helpers/terraform-config => integration/hcl2_original}/string_interpolations.tf (68%) rename test/{helpers/terraform-config => integration/hcl2_original}/unicode_strings.tf (100%) create mode 100644 test/integration/hcl2_reconstructed/floats.tf create mode 100644 test/integration/hcl2_reconstructed/nulls.tf create mode 100644 test/integration/hcl2_reconstructed/object_keys.tf create mode 100644 test/integration/hcl2_reconstructed/resource_keyword_attribute.tf create mode 100644 test/integration/hcl2_reconstructed/string_interpolations.tf create mode 100644 test/integration/hcl2_reconstructed/unicode_strings.tf create mode 100644 test/integration/json_reserialized/floats.json create mode 100644 test/integration/json_reserialized/nulls.json create mode 100644 test/integration/json_reserialized/object_keys.json create mode 100644 test/integration/json_reserialized/resource_keyword_attribute.json create mode 100644 test/integration/json_reserialized/string_interpolations.json create mode 100644 test/integration/json_reserialized/unicode_strings.json create mode 100644 test/integration/json_serialized/floats.json create mode 100644 test/integration/json_serialized/nulls.json create mode 100644 test/integration/json_serialized/object_keys.json create mode 100644 test/integration/json_serialized/resource_keyword_attribute.json create mode 100644 test/integration/json_serialized/string_interpolations.json create mode 100644 test/integration/json_serialized/unicode_strings.json diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 0ca91b48..a1f9733e 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -88,7 +88,12 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - result = StartRule([self._deserialize(value)]) + if isinstance(value, dict): + # Top-level dict is always a body (attributes + blocks), not an object + children = self._deserialize_block_elements(value) + result = StartRule([BodyRule(children)]) + else: + result = StartRule([self._deserialize(value)]) return result def loads(self, value: str) -> LarkElement: @@ -286,7 +291,7 @@ def _deserialize_object(self, value: dict) -> ObjectRule: return ObjectRule([LBRACE(), *children, RBRACE()]) - def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: if self._is_expression(key): key = ObjectElemKeyExpressionRule( [ @@ -295,7 +300,7 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if child is not None ] ) - elif "." in key: + elif isinstance(key, str) and "." in key: parts = key.split(".") children = [] for part in parts: @@ -313,8 +318,8 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: return ObjectElemRule(result) - def _is_expression(self, value: str) -> bool: - return value.startswith("${") and value.endswith("}") + def _is_expression(self, value: Any) -> bool: + return isinstance(value, str) and value.startswith("${") and value.endswith("}") def _is_block(self, value: Any) -> bool: """Simple check: if it's a list containing dicts with IS_BLOCK markers""" diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 63154efb..4a9f1ec6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -81,7 +81,8 @@ start : body // Body and basic constructs body : (new_line_or_comment? (attribute | block))* new_line_or_comment? -attribute : identifier EQ expression +attribute : _attribute_name EQ expression +_attribute_name : identifier | keyword block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE // Whitespace and comments diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 4d7310c8..3f590c5c 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -96,7 +96,11 @@ def value(self) -> key_T: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return self.value.serialize(options, context) + result = self.value.serialize(options, context) + # Object keys must be strings for JSON compatibility + if isinstance(result, (int, float)): + result = str(result) + return result class ObjectElemKeyExpressionRule(LarkRule): diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 07230fe5..7de4f7e1 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -108,6 +108,9 @@ def block(self, meta: Meta, args) -> BlockRule: @v_args(meta=True) def attribute(self, meta: Meta, args) -> AttributeRule: + # _attribute_name is flattened, so args[0] may be KeywordRule or IdentifierRule + if isinstance(args[0], KeywordRule): + args[0] = IdentifierRule([NAME(args[0].token.value)], meta) return AttributeRule(args, meta) @v_args(meta=True) diff --git a/test/helpers/terraform-config/test_floats.tf b/test/integration/hcl2_original/floats.tf similarity index 100% rename from test/helpers/terraform-config/test_floats.tf rename to test/integration/hcl2_original/floats.tf diff --git a/test/helpers/terraform-config/nulls.tf b/test/integration/hcl2_original/nulls.tf similarity index 100% rename from test/helpers/terraform-config/nulls.tf rename to test/integration/hcl2_original/nulls.tf diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf new file mode 100644 index 00000000..913d5a42 --- /dev/null +++ b/test/integration/hcl2_original/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0: 0, + "foo": 1 + baz : 2, + (var.account) : 3 + (format("key_prefix_%s", local.foo)) : 4 + "prefix_${var.account}:${var.user}_suffix": 5, +} diff --git a/test/integration/hcl2_original/resource_keyword_attribute.tf b/test/integration/hcl2_original/resource_keyword_attribute.tf new file mode 100644 index 00000000..fca27d75 --- /dev/null +++ b/test/integration/hcl2_original/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource "custom_provider_resource" "resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index d741a6ac..99537532 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -43,17 +43,6 @@ block label1 label2 { } } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/helpers/terraform-config/string_interpolations.tf b/test/integration/hcl2_original/string_interpolations.tf similarity index 68% rename from test/helpers/terraform-config/string_interpolations.tf rename to test/integration/hcl2_original/string_interpolations.tf index 582b4aac..f9ac4e18 100644 --- a/test/helpers/terraform-config/string_interpolations.tf +++ b/test/integration/hcl2_original/string_interpolations.tf @@ -1,6 +1,6 @@ -locals { - simple_interpolation = "prefix:${var.foo}-suffix" - embedded_interpolation = "(long substring without interpolation); ${module.special_constants.aws_accounts["aaa-${local.foo}-${local.bar}"]}/us-west-2/key_foo" +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" escaped_interpolation = "prefix:$${aws:username}-suffix" simple_and_escaped = "${"bar"}$${baz:bat}" diff --git a/test/helpers/terraform-config/unicode_strings.tf b/test/integration/hcl2_original/unicode_strings.tf similarity index 100% rename from test/helpers/terraform-config/unicode_strings.tf rename to test/integration/hcl2_original/unicode_strings.tf diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf new file mode 100644 index 00000000..810108b2 --- /dev/null +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -0,0 +1,26 @@ +locals { + simple_float = 123.456 + small_float = 0.123 + large_float = 9876543.21 + negative_float = -42.5 + negative_small = -0.001 + scientific_positive = 123000.0 + scientific_negative = 0.00987 + scientific_large = 6.022e+23 + integer_as_float = 100.0 + float_calculation = 10500.0 * 3.0 / 2.1 + float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_list = [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0, + ] + float_object = { + pi = 3.14159, + euler = 2.71828, + sqrt2 = 1.41421, + scientific = -12300.0, + } +} diff --git a/test/integration/hcl2_reconstructed/nulls.tf b/test/integration/hcl2_reconstructed/nulls.tf new file mode 100644 index 00000000..1e487789 --- /dev/null +++ b/test/integration/hcl2_reconstructed/nulls.tf @@ -0,0 +1,11 @@ +terraform = { + unary = !null, + binary = (a == null), + tuple = [ + null, + 1, + 2, + ], + single = null, + conditional = null ? null : null, +} diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf new file mode 100644 index 00000000..497e65a6 --- /dev/null +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -0,0 +1,8 @@ +bar = { + 0 = 0, + "foo" = 1, + baz = 2, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, + "prefix_${var.account}:${var.user}_suffix" = 5, +} diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf new file mode 100644 index 00000000..498777e0 --- /dev/null +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -0,0 +1,8 @@ +resource"custom_provider_resource""resource_name" { + name = "resource_name" + attribute = "attribute_value" + if = "attribute_value2" + in = "attribute_value3" + for = "attribute_value4" + for_each = "attribute_value5" +} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 8f17d6d6..b2de26f3 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -39,17 +39,6 @@ block label1 label2 { } -block label1 label3 { - simple_interpolation = "prefix:${var}-suffix" - embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" - deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" - escaped_interpolation = "prefix:$${aws:username}-suffix" - simple_and_escaped = "${"bar"}$${baz:bat}" - simple_and_escaped_reversed = "$${baz:bat}${"bar"}" - nested_escaped = "bar-${"$${baz:bat}"}" -} - - block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : diff --git a/test/integration/hcl2_reconstructed/string_interpolations.tf b/test/integration/hcl2_reconstructed/string_interpolations.tf new file mode 100644 index 00000000..73df4715 --- /dev/null +++ b/test/integration/hcl2_reconstructed/string_interpolations.tf @@ -0,0 +1,9 @@ +block label1 label3 { + simple_interpolation = "prefix:${var}-suffix" + embedded_interpolation = "(long substring without interpolation); ${"aaa-${local}-${local}"}/us-west-2/key_foo" + deeply_nested_interpolation = "prefix1-${"prefix2-${"prefix3-$${foo:bar}"}"}" + escaped_interpolation = "prefix:$${aws:username}-suffix" + simple_and_escaped = "${"bar"}$${baz:bat}" + simple_and_escaped_reversed = "$${baz:bat}${"bar"}" + nested_escaped = "bar-${"$${baz:bat}"}" +} diff --git a/test/integration/hcl2_reconstructed/unicode_strings.tf b/test/integration/hcl2_reconstructed/unicode_strings.tf new file mode 100644 index 00000000..8c4df70e --- /dev/null +++ b/test/integration/hcl2_reconstructed/unicode_strings.tf @@ -0,0 +1,21 @@ +locals { + basic_unicode = "Hello, 世界! こんにちは Привет नमस्ते" + unicode_escapes = "© ♥ ♪ ☠ ☺" + emoji_string = "🚀 🌍 🔥 🎉" + rtl_text = "English and العربية text mixed" + complex_unicode = "Python (파이썬) es 很棒的! ♥ αβγδ" + ascii = "ASCII: abc123" + emoji = "Emoji: 🚀🌍🔥🎉" + math = "Math: ∑∫√∞≠≤≥" + currency = "Currency: £€¥₹₽₩" + arrows = "Arrows: ←↑→↓↔↕" + cjk = "CJK: 你好世界안녕하세요こんにちは" + cyrillic = "Cyrillic: Привет мир" + special = "Special: ©®™§¶†‡" + mixed_content = <<-EOT + Line with interpolation: ${var.name} + Line with emoji: 👨‍👩‍👧‍👦 + Line with quotes: "quoted text" + Line with backslash: \escaped + EOT +} diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_reserialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_reserialized/nulls.json b/test/integration/json_reserialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_reserialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_reserialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_reserialized/resource_keyword_attribute.json b/test/integration/json_reserialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_reserialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_reserialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_reserialized/unicode_strings.json b/test/integration/json_reserialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_reserialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json new file mode 100644 index 00000000..18078a18 --- /dev/null +++ b/test/integration/json_serialized/floats.json @@ -0,0 +1,31 @@ +{ + "locals": [ + { + "simple_float": 123.456, + "small_float": 0.123, + "large_float": 9876543.21, + "negative_float": -42.5, + "negative_small": -0.001, + "scientific_positive": 123000.0, + "scientific_negative": 0.00987, + "scientific_large": 6.022e+23, + "integer_as_float": 100.0, + "float_calculation": "${10500.0 * 3.0 / 2.1}", + "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_list": [ + 1.1, + 2.2, + 3.3, + -4.4, + 550.0 + ], + "float_object": { + "pi": 3.14159, + "euler": 2.71828, + "sqrt2": 1.41421, + "scientific": -12300.0 + }, + "__is_block__": true + } + ] +} diff --git a/test/integration/json_serialized/nulls.json b/test/integration/json_serialized/nulls.json new file mode 100644 index 00000000..9cbdd755 --- /dev/null +++ b/test/integration/json_serialized/nulls.json @@ -0,0 +1,13 @@ +{ + "terraform": { + "unary": "${!null}", + "binary": "${(a == null)}", + "tuple": [ + "null", + 1, + 2 + ], + "single": "null", + "conditional": "${null ? null : null}" + } +} diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json new file mode 100644 index 00000000..8acccdea --- /dev/null +++ b/test/integration/json_serialized/object_keys.json @@ -0,0 +1,10 @@ +{ + "bar": { + "0": 0, + "\"foo\"": 1, + "baz": 2, + "${(var.account)}": 3, + "${(format(\"key_prefix_%s\", local.foo))}": 4, + "\"prefix_${var.account}:${var.user}_suffix\"": 5 + } +} diff --git a/test/integration/json_serialized/resource_keyword_attribute.json b/test/integration/json_serialized/resource_keyword_attribute.json new file mode 100644 index 00000000..6826a0b8 --- /dev/null +++ b/test/integration/json_serialized/resource_keyword_attribute.json @@ -0,0 +1,17 @@ +{ + "resource": [ + { + "\"custom_provider_resource\"": { + "\"resource_name\"": { + "name": "\"resource_name\"", + "attribute": "\"attribute_value\"", + "if": "\"attribute_value2\"", + "in": "\"attribute_value3\"", + "for": "\"attribute_value4\"", + "for_each": "\"attribute_value5\"", + "__is_block__": true + } + } + } + ] +} diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 48544f85..670c5be3 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -48,20 +48,6 @@ } } }, - { - "label1": { - "label3": { - "simple_interpolation": "\"prefix:${var}-suffix\"", - "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", - "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", - "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", - "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", - "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", - "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", - "__is_block__": true - } - } - }, { "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}", "__is_block__": true diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json new file mode 100644 index 00000000..059fcfbf --- /dev/null +++ b/test/integration/json_serialized/string_interpolations.json @@ -0,0 +1,18 @@ +{ + "block": [ + { + "label1": { + "label3": { + "simple_interpolation": "\"prefix:${var}-suffix\"", + "embedded_interpolation": "\"(long substring without interpolation); ${\"aaa-${local}-${local}\"}/us-west-2/key_foo\"", + "deeply_nested_interpolation": "\"prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}\"", + "escaped_interpolation": "\"prefix:$${aws:username}-suffix\"", + "simple_and_escaped": "\"${\"bar\"}$${baz:bat}\"", + "simple_and_escaped_reversed": "\"$${baz:bat}${\"bar\"}\"", + "nested_escaped": "\"bar-${\"$${baz:bat}\"}\"", + "__is_block__": true + } + } + } + ] +} \ No newline at end of file diff --git a/test/integration/json_serialized/unicode_strings.json b/test/integration/json_serialized/unicode_strings.json new file mode 100644 index 00000000..5f8f0095 --- /dev/null +++ b/test/integration/json_serialized/unicode_strings.json @@ -0,0 +1,21 @@ +{ + "locals": [ + { + "basic_unicode": "\"Hello, \u4e16\u754c! \u3053\u3093\u306b\u3061\u306f \u041f\u0440\u0438\u0432\u0435\u0442 \u0928\u092e\u0938\u094d\u0924\u0947\"", + "unicode_escapes": "\"\u00a9 \u2665 \u266a \u2620 \u263a\"", + "emoji_string": "\"\ud83d\ude80 \ud83c\udf0d \ud83d\udd25 \ud83c\udf89\"", + "rtl_text": "\"English and \u0627\u0644\u0639\u0631\u0628\u064a\u0629 text mixed\"", + "complex_unicode": "\"Python (\ud30c\uc774\uc36c) es \u5f88\u68d2\u7684! \u2665 \u03b1\u03b2\u03b3\u03b4\"", + "ascii": "\"ASCII: abc123\"", + "emoji": "\"Emoji: \ud83d\ude80\ud83c\udf0d\ud83d\udd25\ud83c\udf89\"", + "math": "\"Math: \u2211\u222b\u221a\u221e\u2260\u2264\u2265\"", + "currency": "\"Currency: \u00a3\u20ac\u00a5\u20b9\u20bd\u20a9\"", + "arrows": "\"Arrows: \u2190\u2191\u2192\u2193\u2194\u2195\"", + "cjk": "\"CJK: \u4f60\u597d\u4e16\u754c\uc548\ub155\ud558\uc138\uc694\u3053\u3093\u306b\u3061\u306f\"", + "cyrillic": "\"Cyrillic: \u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440\"", + "special": "\"Special: \u00a9\u00ae\u2122\u00a7\u00b6\u2020\u2021\"", + "mixed_content": "\"<<-EOT\n Line with interpolation: ${var.name}\n Line with emoji: \ud83d\udc68\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66\n Line with quotes: \"quoted text\"\n Line with backslash: \\escaped\n EOT\"", + "__is_block__": true + } + ] +} diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index b49b3f38..5ae28df4 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -171,11 +171,11 @@ def test_serialize_identifier(self): def test_serialize_int_lit(self): rule = ObjectElemKeyRule([IntLitRule([IntLiteral("5")])]) - self.assertEqual(rule.serialize(), 5) + self.assertEqual(rule.serialize(), "5") def test_serialize_float_lit(self): rule = ObjectElemKeyRule([FloatLitRule([FloatLiteral("3.14")])]) - self.assertAlmostEqual(rule.serialize(), 3.14) + self.assertEqual(rule.serialize(), "3.14") def test_serialize_string(self): rule = ObjectElemKeyRule([_make_string_rule("k3")]) From 1ab1f0df96c356fc60e1d8152e9c3f9e784e6038 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 15:21:54 +0100 Subject: [PATCH 25/42] add unit tests for - hcl2/rules/base.py - hcl2/rules/for_expressions.py - hcl2/rules/functions.py add some related bugs --- hcl2/rules/tokens.py | 2 +- test/integration/hcl2_reconstructed/smoke.tf | 2 +- test/integration/json_reserialized/smoke.json | 2 +- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_base.py | 299 +++++++++++++++ test/unit/rules/test_for_expressions.py | 351 ++++++++++++++++++ test/unit/rules/test_functions.py | 165 ++++++++ 7 files changed, 819 insertions(+), 4 deletions(-) create mode 100644 test/unit/rules/test_base.py create mode 100644 test/unit/rules/test_for_expressions.py create mode 100644 test/unit/rules/test_functions.py diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index b02be66e..06d1611f 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -36,7 +36,7 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str -class StaticStringToken(LarkToken): +class StaticStringToken(StringToken): classes_by_value = {} diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index b2de26f3..ae687bdd 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -28,7 +28,7 @@ block label1 label2 { f(a), provider::func::aa(), ] - j = func(a, b, c, d) + j = func(a, b, c, d ... ) k = a.b.5 l = a.*.b m = a[*][c].a.*.1 diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 670c5be3..718086ce 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -32,7 +32,7 @@ "${f(a)}", "${provider::func::aa()}" ], - "j": "${func(a, b, c, d)}", + "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", "l": "${a.*.b}", "m": "${a[*][c].a.*.1}", diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 670c5be3..718086ce 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -32,7 +32,7 @@ "${f(a)}", "${provider::func::aa()}" ], - "j": "${func(a, b, c, d)}", + "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", "l": "${a.*.b}", "m": "${a[*][c].a.*.1}", diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py new file mode 100644 index 00000000..cfb6d666 --- /dev/null +++ b/test/unit/rules/test_base.py @@ -0,0 +1,299 @@ +from unittest import TestCase + +from hcl2.const import IS_BLOCK +from hcl2.rules.base import AttributeRule, BodyRule, StartRule, BlockRule +from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.strings import StringRule, StringPartRule +from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE, DBLQUOTE, STRING_CHARS, NL_OR_COMMENT +from hcl2.rules.whitespace import NewLineOrCommentRule +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_expr_term(value): + return ExprTermRule([StubExpression(value)]) + + +def _make_string_rule(text): + part = StringPartRule([STRING_CHARS(text)]) + return StringRule([DBLQUOTE(), part, DBLQUOTE()]) + + +def _make_nlc(text): + return NewLineOrCommentRule([NL_OR_COMMENT(text)]) + + +def _make_attribute(name, value): + return AttributeRule([_make_identifier(name), EQ(), _make_expr_term(value)]) + + +def _make_block(labels, body_children=None): + """Build a BlockRule with the given labels and body children. + + labels: list of IdentifierRule or StringRule instances + body_children: list of children for the body, or None for empty body + """ + body = BodyRule(body_children or []) + children = list(labels) + [LBRACE(), body, RBRACE()] + return BlockRule(children) + + +# --- AttributeRule tests --- + + +class TestAttributeRule(TestCase): + def test_lark_name(self): + self.assertEqual(AttributeRule.lark_name(), "attribute") + + def test_identifier_property(self): + ident = _make_identifier("name") + attr = AttributeRule([ident, EQ(), _make_expr_term("value")]) + self.assertIs(attr.identifier, ident) + + def test_expression_property(self): + expr_term = _make_expr_term("value") + attr = AttributeRule([_make_identifier("name"), EQ(), expr_term]) + self.assertIs(attr.expression, expr_term) + + def test_serialize(self): + attr = _make_attribute("name", "value") + self.assertEqual(attr.serialize(), {"name": "value"}) + + def test_serialize_int_value(self): + attr = _make_attribute("count", 42) + self.assertEqual(attr.serialize(), {"count": 42}) + + def test_serialize_expression_value(self): + attr = _make_attribute("expr", "${var.x}") + self.assertEqual(attr.serialize(), {"expr": "${var.x}"}) + + +# --- BodyRule tests --- + + +class TestBodyRule(TestCase): + def test_lark_name(self): + self.assertEqual(BodyRule.lark_name(), "body") + + def test_serialize_empty(self): + body = BodyRule([]) + self.assertEqual(body.serialize(), {}) + + def test_serialize_single_attribute(self): + body = BodyRule([_make_attribute("name", "value")]) + self.assertEqual(body.serialize(), {"name": "value"}) + + def test_serialize_multiple_attributes(self): + body = BodyRule([_make_attribute("a", 1), _make_attribute("b", 2)]) + self.assertEqual(body.serialize(), {"a": 1, "b": 2}) + + def test_serialize_single_block(self): + block = _make_block([_make_identifier("resource")]) + body = BodyRule([block]) + result = body.serialize() + self.assertIn("resource", result) + self.assertIsInstance(result["resource"], list) + self.assertEqual(len(result["resource"]), 1) + self.assertTrue(result["resource"][0][IS_BLOCK]) + + def test_serialize_multiple_blocks_same_type(self): + block1 = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "first")], + ) + block2 = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "second")], + ) + body = BodyRule([block1, block2]) + result = body.serialize() + self.assertEqual(len(result["resource"]), 2) + self.assertEqual(result["resource"][0]["name"], "first") + self.assertEqual(result["resource"][1]["name"], "second") + + def test_serialize_mixed_attributes_and_blocks(self): + attr = _make_attribute("version", "1.0") + block = _make_block([_make_identifier("provider")]) + body = BodyRule([attr, block]) + result = body.serialize() + self.assertEqual(result["version"], "1.0") + self.assertIn("provider", result) + self.assertIsInstance(result["provider"], list) + + def test_serialize_comments_collected(self): + nlc = _make_nlc("# a comment\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=True)) + self.assertIn("__comments__", result) + + def test_serialize_comments_not_collected_without_option(self): + nlc = _make_nlc("# a comment\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=False)) + self.assertNotIn("__comments__", result) + + def test_serialize_bare_newlines_not_collected_as_comments(self): + nlc = _make_nlc("\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr]) + result = body.serialize(options=SerializationOptions(with_comments=True)) + self.assertNotIn("__comments__", result) + + def test_serialize_skips_newline_children(self): + nlc = _make_nlc("\n") + attr = _make_attribute("x", 1) + body = BodyRule([nlc, attr, nlc]) + result = body.serialize() + # NLC children should not appear as keys + keys = [k for k in result.keys() if not k.startswith("__")] + self.assertEqual(keys, ["x"]) + + +# --- StartRule tests --- + + +class TestStartRule(TestCase): + def test_lark_name(self): + self.assertEqual(StartRule.lark_name(), "start") + + def test_body_property(self): + body = BodyRule([]) + start = StartRule([body]) + self.assertIs(start.body, body) + + def test_serialize_delegates_to_body(self): + attr = _make_attribute("key", "val") + body = BodyRule([attr]) + start = StartRule([body]) + self.assertEqual(start.serialize(), body.serialize()) + + def test_serialize_empty_body(self): + start = StartRule([BodyRule([])]) + self.assertEqual(start.serialize(), {}) + + +# --- BlockRule tests --- + + +class TestBlockRule(TestCase): + def test_lark_name(self): + self.assertEqual(BlockRule.lark_name(), "block") + + def test_labels_property_single(self): + ident = _make_identifier("resource") + block = _make_block([ident]) + self.assertEqual(len(block.labels), 1) + self.assertIs(block.labels[0], ident) + + def test_labels_property_two(self): + i1 = _make_identifier("resource") + i2 = _make_identifier("aws_instance") + block = _make_block([i1, i2]) + self.assertEqual(len(block.labels), 2) + self.assertIs(block.labels[0], i1) + self.assertIs(block.labels[1], i2) + + def test_labels_property_three(self): + i1 = _make_identifier("resource") + i2 = _make_identifier("aws_instance") + s3 = _make_string_rule("example") + block = _make_block([i1, i2, s3]) + labels = block.labels + self.assertEqual(len(labels), 3) + self.assertIs(labels[0], i1) + self.assertIs(labels[1], i2) + self.assertIs(labels[2], s3) + + def test_body_property(self): + body = BodyRule([]) + ident = _make_identifier("resource") + block = BlockRule([ident, LBRACE(), body, RBRACE()]) + self.assertIs(block.body, body) + + def test_constructor_filters_tokens(self): + """LBRACE and RBRACE should not appear in labels or body.""" + ident = _make_identifier("resource") + body = BodyRule([]) + block = BlockRule([ident, LBRACE(), body, RBRACE()]) + # labels should only contain the identifier + self.assertEqual(len(block.labels), 1) + self.assertIs(block.labels[0], ident) + self.assertIs(block.body, body) + + def test_serialize_single_label_empty_body(self): + block = _make_block([_make_identifier("resource")]) + result = block.serialize() + self.assertEqual(result, {IS_BLOCK: True}) + + def test_serialize_single_label_with_body(self): + block = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "foo")], + ) + result = block.serialize() + self.assertEqual(result, {"name": "foo", IS_BLOCK: True}) + + def test_serialize_two_labels(self): + block = _make_block( + [_make_identifier("resource"), _make_identifier("aws_instance")], + [_make_attribute("ami", "abc")], + ) + result = block.serialize() + self.assertIn("aws_instance", result) + inner = result["aws_instance"] + self.assertEqual(inner, {"ami": "abc", IS_BLOCK: True}) + + def test_serialize_three_labels(self): + block = _make_block( + [ + _make_identifier("resource"), + _make_identifier("aws_instance"), + _make_string_rule("example"), + ], + [_make_attribute("ami", "abc")], + ) + result = block.serialize() + self.assertIn("aws_instance", result) + inner = result["aws_instance"] + self.assertIn('"example"', inner) + innermost = inner['"example"'] + self.assertEqual(innermost, {"ami": "abc", IS_BLOCK: True}) + + def test_serialize_explicit_blocks_false(self): + block = _make_block( + [_make_identifier("resource")], + [_make_attribute("name", "foo")], + ) + opts = SerializationOptions(explicit_blocks=False) + result = block.serialize(options=opts) + self.assertNotIn(IS_BLOCK, result) + self.assertEqual(result, {"name": "foo"}) + + def test_serialize_string_label(self): + block = _make_block( + [_make_identifier("resource"), _make_string_rule("my_label")], + [_make_attribute("x", 1)], + ) + result = block.serialize() + # StringRule serializes with quotes + self.assertIn('"my_label"', result) diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py new file mode 100644 index 00000000..febec643 --- /dev/null +++ b/test/unit/rules/test_for_expressions.py @@ -0,0 +1,351 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.for_expressions import ( + ForIntroRule, + ForCondRule, + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import ( + NAME, + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed string.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_for_intro_single(iter_name, iterable_value): + """Build ForIntroRule with a single iterator: for iter_name in iterable :""" + return ForIntroRule([ + FOR(), + _make_identifier(iter_name), + IN(), + StubExpression(iterable_value), + COLON(), + ]) + + +def _make_for_intro_dual(iter1_name, iter2_name, iterable_value): + """Build ForIntroRule with dual iterators: for iter1, iter2 in iterable :""" + return ForIntroRule([ + FOR(), + _make_identifier(iter1_name), + COMMA(), + _make_identifier(iter2_name), + IN(), + StubExpression(iterable_value), + COLON(), + ]) + + +def _make_for_cond(value): + """Build ForCondRule: if """ + return ForCondRule([IF(), StubExpression(value)]) + + +# --- ForIntroRule tests --- + + +class TestForIntroRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForIntroRule.lark_name(), "for_intro") + + def test_first_iterator_single(self): + ident = _make_identifier("v") + rule = ForIntroRule([FOR(), ident, IN(), StubExpression("items"), COLON()]) + self.assertIs(rule.first_iterator, ident) + + def test_first_iterator_dual(self): + i1 = _make_identifier("k") + i2 = _make_identifier("v") + rule = ForIntroRule([FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()]) + self.assertIs(rule.first_iterator, i1) + + def test_second_iterator_none_when_single(self): + rule = _make_for_intro_single("v", "items") + self.assertIsNone(rule.second_iterator) + + def test_second_iterator_present_when_dual(self): + i2 = _make_identifier("v") + rule = ForIntroRule([ + FOR(), + _make_identifier("k"), + COMMA(), + i2, + IN(), + StubExpression("items"), + COLON(), + ]) + self.assertIs(rule.second_iterator, i2) + + def test_iterable_property(self): + iterable = StubExpression("items") + rule = ForIntroRule([FOR(), _make_identifier("v"), IN(), iterable, COLON()]) + self.assertIs(rule.iterable, iterable) + + def test_serialize_single_iterator(self): + rule = _make_for_intro_single("v", "items") + self.assertEqual(rule.serialize(), "for v in items : ") + + def test_serialize_dual_iterator(self): + rule = _make_for_intro_dual("k", "v", "items") + self.assertEqual(rule.serialize(), "for k, v in items : ") + + def test_children_length(self): + rule = _make_for_intro_single("v", "items") + self.assertEqual(len(rule.children), 12) + + +# --- ForCondRule tests --- + + +class TestForCondRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForCondRule.lark_name(), "for_cond") + + def test_condition_expr_property(self): + cond_expr = StubExpression("cond") + rule = ForCondRule([IF(), cond_expr]) + self.assertIs(rule.condition_expr, cond_expr) + + def test_serialize(self): + rule = _make_for_cond("cond") + self.assertEqual(rule.serialize(), "if cond") + + def test_children_length(self): + rule = _make_for_cond("cond") + self.assertEqual(len(rule.children), 3) + + +# --- ForTupleExprRule tests --- + + +class TestForTupleExprRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForTupleExprRule.lark_name(), "for_tuple_expr") + + def test_for_intro_property(self): + intro = _make_for_intro_single("v", "items") + rule = ForTupleExprRule([LSQB(), intro, StubExpression("expr"), RSQB()]) + self.assertIs(rule.for_intro, intro) + + def test_value_expr_property(self): + value_expr = StubExpression("expr") + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + value_expr, + RSQB(), + ]) + self.assertIs(rule.value_expr, value_expr) + + def test_condition_none(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + self.assertIsNone(rule.condition) + + def test_condition_present(self): + cond = _make_for_cond("cond") + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + cond, + RSQB(), + ]) + self.assertIsInstance(rule.condition, ForCondRule) + self.assertIs(rule.condition, cond) + + def test_serialize_without_condition(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + self.assertEqual(rule.serialize(), "${[for v in items : expr]}") + + def test_serialize_with_condition(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + _make_for_cond("cond"), + RSQB(), + ]) + self.assertEqual(rule.serialize(), "${[for v in items : expr if cond]}") + + def test_serialize_inside_dollar_string(self): + rule = ForTupleExprRule([ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "[for v in items : expr]") + + +# --- ForObjectExprRule tests --- + + +class TestForObjectExprRule(TestCase): + def test_lark_name(self): + self.assertEqual(ForObjectExprRule.lark_name(), "for_object_expr") + + def test_for_intro_property(self): + intro = _make_for_intro_dual("k", "v", "items") + rule = ForObjectExprRule([ + LBRACE(), + intro, + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIs(rule.for_intro, intro) + + def test_key_expr_property(self): + key_expr = StubExpression("key") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + key_expr, + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIs(rule.key_expr, key_expr) + + def test_value_expr_property(self): + value_expr = StubExpression("value") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ]) + self.assertIs(rule.value_expr, value_expr) + + def test_ellipsis_none(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIsNone(rule.ellipsis) + + def test_ellipsis_present(self): + ellipsis = ELLIPSIS() + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ellipsis, + RBRACE(), + ]) + self.assertIs(rule.ellipsis, ellipsis) + + def test_condition_none(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertIsNone(rule.condition) + + def test_condition_present(self): + cond = _make_for_cond("cond") + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + cond, + RBRACE(), + ]) + self.assertIsInstance(rule.condition, ForCondRule) + self.assertIs(rule.condition, cond) + + def test_serialize_basic(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ]) + self.assertEqual(rule.serialize(), "${{for k, v in items : key => value}}") + + def test_serialize_with_ellipsis(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ELLIPSIS(), + RBRACE(), + ]) + result = rule.serialize() + self.assertIn("...", result) + self.assertEqual(result, "${{for k, v in items : key => value...}}") + + def test_serialize_with_condition(self): + rule = ForObjectExprRule([ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + _make_for_cond("cond"), + RBRACE(), + ]) + result = rule.serialize() + self.assertIn("if cond", result) + self.assertEqual(result, "${{for k, v in items : key => value if cond}}") diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py new file mode 100644 index 00000000..188d5edd --- /dev/null +++ b/test/unit/rules/test_functions.py @@ -0,0 +1,165 @@ +from unittest import TestCase + +from hcl2.rules.expressions import ExpressionRule +from hcl2.rules.functions import ( + ArgumentsRule, + FunctionCallRule, + ProviderFunctionCallRule, +) +from hcl2.rules.literal_rules import IdentifierRule +from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR +from hcl2.utils import SerializationOptions, SerializationContext + + +# --- Stubs & helpers --- + + +class StubExpression(ExpressionRule): + """Minimal concrete ExpressionRule that serializes to a fixed value.""" + + def __init__(self, value): + self._stub_value = value + super().__init__([], None) + + def serialize(self, options=SerializationOptions(), context=SerializationContext()): + return self._stub_value + + +def _make_identifier(name): + return IdentifierRule([NAME(name)]) + + +def _make_arguments(values, ellipsis=False): + """Build an ArgumentsRule from a list of stub values. + + values: list of serialization values for StubExpression args + ellipsis: if True, append an ELLIPSIS token + """ + children = [] + for i, val in enumerate(values): + if i > 0: + children.append(COMMA()) + children.append(StubExpression(val)) + if ellipsis: + children.append(ELLIPSIS()) + return ArgumentsRule(children) + + +def _make_function_call(func_names, arg_values=None, ellipsis=False): + """Build a FunctionCallRule. + + func_names: list of identifier name strings (e.g. ["func"] or ["ns", "mod", "func"]) + arg_values: optional list of stub values for arguments + ellipsis: if True, pass ellipsis to arguments + """ + children = [_make_identifier(name) for name in func_names] + children.append(LPAR()) + if arg_values is not None: + children.append(_make_arguments(arg_values, ellipsis)) + children.append(RPAR()) + return FunctionCallRule(children) + + +# --- ArgumentsRule tests --- + + +class TestArgumentsRule(TestCase): + def test_lark_name(self): + self.assertEqual(ArgumentsRule.lark_name(), "arguments") + + def test_has_ellipsis_false(self): + rule = _make_arguments(["a"]) + self.assertFalse(rule.has_ellipsis) + + def test_has_ellipsis_true(self): + rule = _make_arguments(["a", "b"], ellipsis=True) + self.assertTrue(rule.has_ellipsis) + + def test_arguments_single(self): + rule = _make_arguments(["a"]) + self.assertEqual(len(rule.arguments), 1) + + def test_arguments_multiple(self): + rule = _make_arguments(["a", "b", "c"]) + self.assertEqual(len(rule.arguments), 3) + + def test_serialize_single_arg(self): + rule = _make_arguments(["a"]) + self.assertEqual(rule.serialize(), "a") + + def test_serialize_with_ellipsis(self): + rule = _make_arguments(["a", "b"], ellipsis=True) + self.assertEqual(rule.serialize(), "a, b ...") + + +# --- FunctionCallRule tests --- + + +class TestFunctionCallRule(TestCase): + def test_lark_name(self): + self.assertEqual(FunctionCallRule.lark_name(), "function_call") + + def test_identifiers_single(self): + rule = _make_function_call(["func"]) + self.assertEqual(len(rule.identifiers), 1) + + def test_identifiers_multiple(self): + rule = _make_function_call(["ns", "mod", "func"]) + self.assertEqual(len(rule.identifiers), 3) + + def test_arguments_property_present(self): + rule = _make_function_call(["func"], ["a"]) + self.assertIsInstance(rule.arguments, ArgumentsRule) + + def test_arguments_property_none(self): + rule = _make_function_call(["func"]) + self.assertIsNone(rule.arguments) + + def test_serialize_simple_no_args(self): + rule = _make_function_call(["func"]) + self.assertEqual(rule.serialize(), "${func()}") + + def test_serialize_simple_with_args(self): + rule = _make_function_call(["func"], ["a", "b"]) + self.assertEqual(rule.serialize(), "${func(a, b)}") + + def test_serialize_inside_dollar_string(self): + rule = _make_function_call(["func"], ["a"]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "func(a)") + + +# --- ProviderFunctionCallRule tests --- + + +class TestProviderFunctionCallRule(TestCase): + def test_lark_name(self): + self.assertEqual(ProviderFunctionCallRule.lark_name(), "provider_function_call") + + def test_inherits_function_call_rule(self): + self.assertTrue(issubclass(ProviderFunctionCallRule, FunctionCallRule)) + + def test_serialize_provider_function(self): + children = [ + _make_identifier("ns"), + _make_identifier("mod"), + _make_identifier("func"), + LPAR(), + _make_arguments(["a"]), + RPAR(), + ] + rule = ProviderFunctionCallRule(children) + self.assertEqual(rule.serialize(), "${ns::mod::func(a)}") + + def test_serialize_inside_dollar_string(self): + children = [ + _make_identifier("ns"), + _make_identifier("mod"), + _make_identifier("func"), + LPAR(), + _make_arguments(["a"]), + RPAR(), + ] + rule = ProviderFunctionCallRule(children) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "ns::mod::func(a)") From 0a6b99684e5d338eba718592a2acc2d485c9539a Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 16:41:49 +0100 Subject: [PATCH 26/42] exclude abstract methods from test coverage report --- .coveragerc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.coveragerc b/.coveragerc index 3907df05..b40d5c58 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,3 +10,5 @@ omit = [report] show_missing = true fail_under = 90 +exclude_lines = + raise NotImplementedError From be1e4f13b999aa19ccdaab3b43758fed020693a2 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 16:43:33 +0100 Subject: [PATCH 27/42] fix scientific notation preservation, function argument lookup during serialization, and block label spacing in reconstruction --- hcl2/reconstructor.py | 6 +++- hcl2/rules/functions.py | 2 +- hcl2/rules/literal_rules.py | 14 +++++++- hcl2/utils.py | 1 + test/integration/hcl2_reconstructed/floats.tf | 12 +++---- .../resource_keyword_attribute.tf | 2 +- test/integration/hcl2_reconstructed/smoke.tf | 2 +- .../integration/json_reserialized/floats.json | 16 +++++----- test/integration/json_reserialized/smoke.json | 2 +- test/integration/json_serialized/floats.json | 16 +++++----- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_functions.py | 19 ++++++++++- test/unit/rules/test_literal_rules.py | 32 ++++++++++++++++++- 13 files changed, 95 insertions(+), 31 deletions(-) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 1b5260ac..abfc21f6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -156,6 +156,10 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st result = [] rule_name = tree.data + # Check spacing BEFORE processing children, while _last_rule_name + # still reflects the previous sibling (not a child of this tree). + needs_space = self._should_add_space_before(tree, parent_rule_name) + if rule_name == UnaryOpRule.lark_name(): for i, child in enumerate(tree.children): result.extend(self._reconstruct_node(child, rule_name)) @@ -183,7 +187,7 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) - if self._should_add_space_before(tree, parent_rule_name): + if needs_space: result.insert(0, " ") # Update state tracking diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index 380b959b..49b20f65 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -82,7 +82,7 @@ def identifiers(self) -> List[IdentifierRule]: @property @lru_cache(maxsize=None) def arguments(self) -> Optional[ArgumentsRule]: - for child in self._children[2:6]: + for child in self._children: if isinstance(child, ArgumentsRule): return child diff --git a/hcl2/rules/literal_rules.py b/hcl2/rules/literal_rules.py index 2e5b8281..cb8396b4 100644 --- a/hcl2/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rules.abstract import LarkRule, LarkToken -from hcl2.utils import SerializationOptions, SerializationContext +from hcl2.utils import SerializationOptions, SerializationContext, to_dollar_string class TokenRule(LarkRule, ABC): @@ -42,6 +42,18 @@ class FloatLitRule(TokenRule): def lark_name() -> str: return "float_lit" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + value = self.token.value + # Scientific notation (e.g. 1.23e5) cannot survive a Python float() + # round-trip, so preserve it as a ${...} expression string. + if options.preserve_scientific_notation and isinstance(value, str) and "e" in value.lower(): + if context.inside_dollar_string: + return value + return to_dollar_string(value) + return self.token.serialize() + class BinaryOperatorRule(TokenRule): @staticmethod diff --git a/hcl2/utils.py b/hcl2/utils.py index 68c32ebc..b15dda8a 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -16,6 +16,7 @@ class SerializationOptions: explicit_blocks: bool = True preserve_heredocs: bool = True force_operation_parentheses: bool = False + preserve_scientific_notation: bool = True @dataclass diff --git a/test/integration/hcl2_reconstructed/floats.tf b/test/integration/hcl2_reconstructed/floats.tf index 810108b2..23dc46fe 100644 --- a/test/integration/hcl2_reconstructed/floats.tf +++ b/test/integration/hcl2_reconstructed/floats.tf @@ -4,23 +4,23 @@ locals { large_float = 9876543.21 negative_float = -42.5 negative_small = -0.001 - scientific_positive = 123000.0 - scientific_negative = 0.00987 + scientific_positive = 1.23e5 + scientific_negative = 9.87e-3 scientific_large = 6.022e+23 integer_as_float = 100.0 - float_calculation = 10500.0 * 3.0 / 2.1 - float_comparison = 50.0 > 2.3 ? 1.0 : 0.0 + float_calculation = 105e+2 * 3.0 / 2.1 + float_comparison = 5e1 > 2.3 ? 1.0 : 0.0 float_list = [ 1.1, 2.2, 3.3, -4.4, - 550.0, + 5.5e2, ] float_object = { pi = 3.14159, euler = 2.71828, sqrt2 = 1.41421, - scientific = -12300.0, + scientific = -123e+2, } } diff --git a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf index 498777e0..c9ada660 100644 --- a/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf +++ b/test/integration/hcl2_reconstructed/resource_keyword_attribute.tf @@ -1,4 +1,4 @@ -resource"custom_provider_resource""resource_name" { +resource "custom_provider_resource" "resource_name" { name = "resource_name" attribute = "attribute_value" if = "attribute_value2" diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index ae687bdd..40e2597d 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -26,7 +26,7 @@ block label1 label2 { 3, ], f(a), - provider::func::aa(), + provider::func::aa(5), ] j = func(a, b, c, d ... ) k = a.b.5 diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json index 18078a18..8246516c 100644 --- a/test/integration/json_reserialized/floats.json +++ b/test/integration/json_reserialized/floats.json @@ -6,26 +6,26 @@ "large_float": 9876543.21, "negative_float": -42.5, "negative_small": -0.001, - "scientific_positive": 123000.0, - "scientific_negative": 0.00987, - "scientific_large": 6.022e+23, + "scientific_positive": "${1.23e5}", + "scientific_negative": "${9.87e-3}", + "scientific_large": "${6.022e+23}", "integer_as_float": 100.0, - "float_calculation": "${10500.0 * 3.0 / 2.1}", - "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_calculation": "${105e+2 * 3.0 / 2.1}", + "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", "float_list": [ 1.1, 2.2, 3.3, -4.4, - 550.0 + "${5.5e2}" ], "float_object": { "pi": 3.14159, "euler": 2.71828, "sqrt2": 1.41421, - "scientific": -12300.0 + "scientific": "${-123e+2}" }, "__is_block__": true } ] -} +} \ No newline at end of file diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 718086ce..5bcc702a 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -30,7 +30,7 @@ 3 ], "${f(a)}", - "${provider::func::aa()}" + "${provider::func::aa(5)}" ], "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json index 18078a18..8246516c 100644 --- a/test/integration/json_serialized/floats.json +++ b/test/integration/json_serialized/floats.json @@ -6,26 +6,26 @@ "large_float": 9876543.21, "negative_float": -42.5, "negative_small": -0.001, - "scientific_positive": 123000.0, - "scientific_negative": 0.00987, - "scientific_large": 6.022e+23, + "scientific_positive": "${1.23e5}", + "scientific_negative": "${9.87e-3}", + "scientific_large": "${6.022e+23}", "integer_as_float": 100.0, - "float_calculation": "${10500.0 * 3.0 / 2.1}", - "float_comparison": "${50.0 > 2.3 ? 1.0 : 0.0}", + "float_calculation": "${105e+2 * 3.0 / 2.1}", + "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", "float_list": [ 1.1, 2.2, 3.3, -4.4, - 550.0 + "${5.5e2}" ], "float_object": { "pi": 3.14159, "euler": 2.71828, "sqrt2": 1.41421, - "scientific": -12300.0 + "scientific": "${-123e+2}" }, "__is_block__": true } ] -} +} \ No newline at end of file diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 718086ce..5bcc702a 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -30,7 +30,7 @@ 3 ], "${f(a)}", - "${provider::func::aa()}" + "${provider::func::aa(5)}" ], "j": "${func(a, b, c, d ...)}", "k": "${a.b.5}", diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 188d5edd..4c4b336e 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -7,7 +7,7 @@ ProviderFunctionCallRule, ) from hcl2.rules.literal_rules import IdentifierRule -from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR +from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR, StringToken from hcl2.utils import SerializationOptions, SerializationContext @@ -128,6 +128,23 @@ def test_serialize_inside_dollar_string(self): ctx = SerializationContext(inside_dollar_string=True) self.assertEqual(rule.serialize(context=ctx), "func(a)") + def test_arguments_with_colons_tokens(self): + """FunctionCallRule with COLONS tokens (provider syntax) should still find arguments.""" + COLONS = StringToken["COLONS"] + children = [ + _make_identifier("provider"), + COLONS("::"), + _make_identifier("func"), + COLONS("::"), + _make_identifier("aa"), + LPAR(), + _make_arguments([5]), + RPAR(), + ] + rule = FunctionCallRule(children) + self.assertIsNotNone(rule.arguments) + self.assertEqual(rule.serialize(), "${provider::func::aa(5)}") + # --- ProviderFunctionCallRule tests --- diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py index f6b8b94c..f8513c21 100644 --- a/test/unit/rules/test_literal_rules.py +++ b/test/unit/rules/test_literal_rules.py @@ -1,7 +1,6 @@ from unittest import TestCase from hcl2.rules.literal_rules import ( - TokenRule, KeywordRule, IdentifierRule, IntLitRule, @@ -9,6 +8,7 @@ BinaryOperatorRule, ) from hcl2.rules.tokens import NAME, BINARY_OP, IntLiteral, FloatLiteral +from hcl2.utils import SerializationContext, SerializationOptions class TestKeywordRule(TestCase): @@ -60,6 +60,36 @@ def test_serialize_returns_float(self): self.assertAlmostEqual(result, 3.14) self.assertIsInstance(result, float) + def test_serialize_scientific_notation_as_dollar_string(self): + """Scientific notation is preserved as ${...} to survive dict round-trip.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + self.assertEqual(rule.serialize(), "${1.23e5}") + + def test_serialize_scientific_negative_exponent(self): + rule = FloatLitRule([FloatLiteral("9.87e-3")]) + self.assertEqual(rule.serialize(), "${9.87e-3}") + + def test_serialize_scientific_inside_dollar_string(self): + """Inside a dollar string context, return raw value without wrapping.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + ctx = SerializationContext(inside_dollar_string=True) + self.assertEqual(rule.serialize(context=ctx), "1.23e5") + + def test_serialize_regular_float_not_wrapped(self): + """Non-scientific floats should remain plain Python floats.""" + rule = FloatLitRule([FloatLiteral("123.456")]) + result = rule.serialize() + self.assertEqual(result, 123.456) + self.assertIsInstance(result, float) + + def test_serialize_scientific_disabled(self): + """With preserve_scientific_notation=False, returns plain float.""" + rule = FloatLitRule([FloatLiteral("1.23e5")]) + opts = SerializationOptions(preserve_scientific_notation=False) + result = rule.serialize(options=opts) + self.assertEqual(result, 123000.0) + self.assertIsInstance(result, float) + class TestBinaryOperatorRule(TestCase): def test_lark_name(self): From 13ae15a99520541f14d175d3f0cb44433d41952c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 23 Feb 2026 18:54:34 +0100 Subject: [PATCH 28/42] more robust heredocs serialization, add option to deserialize strings into heredocs --- hcl2/deserializer.py | 19 ++++- hcl2/rules/strings.py | 19 +++-- test/integration/specialized/heredocs.tf | 34 +++++++++ .../specialized/heredocs_flattened.json | 14 ++++ .../specialized/heredocs_preserved.json | 14 ++++ .../specialized/heredocs_restored.tf | 20 +++++ test/integration/test_specialized.py | 76 +++++++++++++++++++ test/unit/rules/test_strings.py | 70 ++++++++++++++++- 8 files changed, 256 insertions(+), 10 deletions(-) create mode 100644 test/integration/specialized/heredocs.tf create mode 100644 test/integration/specialized/heredocs_flattened.json create mode 100644 test/integration/specialized/heredocs_preserved.json create mode 100644 test/integration/specialized/heredocs_restored.tf diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index a1f9733e..167d21f9 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,4 +1,5 @@ import json +import re from abc import ABC, abstractmethod from dataclasses import dataclass from functools import cached_property @@ -62,7 +63,7 @@ @dataclass class DeserializerOptions: heredocs_to_strings: bool = False - indent_length: int = 2 + strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True @@ -156,6 +157,11 @@ def _deserialize_text(self, value: Any) -> LarkRule: if match: return self._deserialize_heredoc(value[1:-1], False) + if self.options.strings_to_heredocs: + inner = value[1:-1] + if '\\n' in inner: + return self._deserialize_string_as_heredoc(inner) + return self._deserialize_string(value) if self._is_expression(value): @@ -212,6 +218,17 @@ def _deserialize_heredoc( return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_string_as_heredoc(self, inner: str) -> HeredocTemplateRule: + """Convert a quoted string with escaped newlines back into a heredoc.""" + # Single-pass unescape: \\n → \n, \\" → ", \\\\ → \ + content = re.sub( + r'\\(n|"|\\)', + lambda m: '\n' if m.group(1) == 'n' else m.group(1), + inner, + ) + heredoc = f"< ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index 248ab173..b757f317 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -101,8 +101,10 @@ def serialize( match = HEREDOC_PATTERN.match(heredoc) if not match: raise RuntimeError(f"Invalid Heredoc token: {heredoc}") - heredoc = match.group(2) - + heredoc = match.group(2).rstrip(self._trim_chars) + heredoc = heredoc.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') + return f'"{heredoc}"' + result = heredoc.rstrip(self._trim_chars) return f'"{result}"' @@ -131,16 +133,21 @@ def serialize( raise RuntimeError(f"Invalid Heredoc token: {heredoc}") heredoc = match.group(2) - heredoc = heredoc.rstrip(self._trim_chars) + heredoc = heredoc.rstrip(self._trim_chars) lines = heredoc.split("\n") - + # calculate the min number of leading spaces in each line min_spaces = sys.maxsize for line in lines: leading_spaces = len(line) - len(line.lstrip(" ")) min_spaces = min(min_spaces, leading_spaces) - + # trim off that number of leading spaces from each line lines = [line[min_spaces:] for line in lines] - return '"' + "\n".join(lines) + '"' + + if not options.preserve_heredocs: + lines = [line.replace('\\', '\\\\').replace('"', '\\"') for line in lines] + + sep = "\\n" if not options.preserve_heredocs else "\n" + return '"' + sep.join(lines) + '"' \ No newline at end of file diff --git a/test/integration/specialized/heredocs.tf b/test/integration/specialized/heredocs.tf new file mode 100644 index 00000000..9fc16498 --- /dev/null +++ b/test/integration/specialized/heredocs.tf @@ -0,0 +1,34 @@ +locals { + simple = < str: + """Deserialize a Python dict and reconstruct HCL text with custom options.""" + deserializer = BaseDeserializer(deserializer_options) + formatter = BaseFormatter() + reconstructor = HCLReconstructor() + deserialized = deserializer.load_python(serialized) + formatter.format_tree(deserialized) + lark_tree = deserialized.to_lark() + return reconstructor.reconstruct(lark_tree) + + +class TestHeredocs(TestCase): + """Test heredoc serialization, flattening, restoration, and round-trips. + + Scenarios: + 1. HCL with heredocs → JSON (preserve_heredocs=True) + 2. HCL with heredocs → JSON (preserve_heredocs=False, newlines escaped) + 3. Flattened JSON → HCL (strings_to_heredocs=True restores multiline) + 4. Full round-trip: flatten → restore → reparse → reflatten matches + """ + + maxDiff = None + _FLATTEN_OPTIONS = SerializationOptions(preserve_heredocs=False) + + def _load_special(self, name, suffix): + return (SPECIAL_DIR / f"{name}{suffix}").read_text() + + def test_parse_preserves_heredocs(self): + """HCL → JSON with default options preserves heredoc markers.""" + hcl_text = self._load_special("heredocs", ".tf") + actual = _parse_and_serialize(hcl_text) + expected = json.loads(self._load_special("heredocs_preserved", ".json")) + self.assertEqual(actual, expected) + + def test_parse_flattens_heredocs(self): + """HCL → JSON with preserve_heredocs=False escapes newlines in quoted strings.""" + hcl_text = self._load_special("heredocs", ".tf") + actual = _parse_and_serialize(hcl_text, options=self._FLATTEN_OPTIONS) + expected = json.loads(self._load_special("heredocs_flattened", ".json")) + self.assertEqual(actual, expected) + + def test_flattened_to_hcl_restores_heredocs(self): + """Flattened JSON → HCL with strings_to_heredocs=True restores multiline heredocs.""" + flattened = json.loads(self._load_special("heredocs_flattened", ".json")) + d_opts = DeserializerOptions(strings_to_heredocs=True) + actual = _deserialize_and_reconstruct_with_options(flattened, d_opts) + expected = self._load_special("heredocs_restored", ".tf") + self.assertMultiLineEqual(actual, expected) + + def test_flatten_restore_round_trip(self): + """Flatten → restore → reparse → reflatten produces identical flattened JSON.""" + hcl_text = self._load_special("heredocs", ".tf") + + # Forward: HCL → flattened JSON + flattened = _parse_and_serialize(hcl_text, options=self._FLATTEN_OPTIONS) + + # Restore: flattened JSON → HCL with heredocs + d_opts = DeserializerOptions(strings_to_heredocs=True) + restored_hcl = _deserialize_and_reconstruct_with_options(flattened, d_opts) + + # Reflatten: restored HCL → flattened JSON + reflattened = _parse_and_serialize(restored_hcl, options=self._FLATTEN_OPTIONS) + + self.assertEqual( + reflattened, + flattened, + "Flatten → restore → reflatten did not produce identical JSON", + ) diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index 67fec075..e7fb28f1 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -191,7 +191,43 @@ def test_serialize_no_preserve_multiline(self): rule = HeredocTemplateRule([token]) opts = SerializationOptions(preserve_heredocs=False) result = rule.serialize(opts) - self.assertEqual(result, '"line1\nline2"') + self.assertEqual(result, '"line1\\nline2"') + + def test_serialize_no_preserve_escapes_quotes(self): + token = HEREDOC_TEMPLATE('< Date: Mon, 2 Mar 2026 16:54:16 +0100 Subject: [PATCH 29/42] CLI rework --- .coveragerc | 3 +- cli/__init__.py | 0 cli/hcl_to_json.py | 118 ++++++++++++++ cli/helpers.py | 92 +++++++++++ cli/json_to_hcl.py | 124 +++++++++++++++ hcl2/__main__.py | 107 +------------ pyproject.toml | 5 +- test/unit/cli/__init__.py | 0 test/unit/cli/test_hcl_to_json.py | 254 ++++++++++++++++++++++++++++++ test/unit/cli/test_helpers.py | 174 ++++++++++++++++++++ test/unit/cli/test_json_to_hcl.py | 156 ++++++++++++++++++ 11 files changed, 925 insertions(+), 108 deletions(-) create mode 100644 cli/__init__.py create mode 100644 cli/hcl_to_json.py create mode 100644 cli/helpers.py create mode 100644 cli/json_to_hcl.py create mode 100644 test/unit/cli/__init__.py create mode 100644 test/unit/cli/test_hcl_to_json.py create mode 100644 test/unit/cli/test_helpers.py create mode 100644 test/unit/cli/test_json_to_hcl.py diff --git a/.coveragerc b/.coveragerc index b40d5c58..558bc244 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,11 +1,12 @@ [run] branch = true omit = - hcl2/__main__.py hcl2/lark_parser.py hcl2/version.py + hcl2/__main__.py hcl2/__init__.py hcl2/rules/__init__.py + cli/__init__.py [report] show_missing = true diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py new file mode 100644 index 00000000..faa9fb33 --- /dev/null +++ b/cli/hcl_to_json.py @@ -0,0 +1,118 @@ +"""``hcl2tojson`` CLI entry point — convert HCL2 files to JSON.""" +import argparse +import json +import os +from typing import IO + +from hcl2 import load +from hcl2.utils import SerializationOptions +from hcl2.version import __version__ +from .helpers import HCL_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin + + +def _hcl_to_json( + in_file: IO, out_file: IO, options: SerializationOptions, json_indent: int = None, +) -> None: + data = load(in_file, serialization_options=options) + json.dump(data, out_file, indent=json_indent) + + +def main(): + """The ``hcl2tojson`` console_scripts entry point.""" + parser = argparse.ArgumentParser( + description="Convert HCL2 files to JSON", + ) + parser.add_argument( + "-s", dest="skip", action="store_true", help="Skip un-parsable files" + ) + parser.add_argument( + "PATH", + help='The file or directory to convert (use "-" for stdin)', + ) + parser.add_argument( + "OUT_PATH", + nargs="?", + help="The path to write output to. Optional for single file (defaults to stdout)", + ) + parser.add_argument("--version", action="version", version=__version__) + + # SerializationOptions flags + parser.add_argument( + "--with-meta", + action="store_true", + help="Add meta parameters like __start_line__ and __end_line__", + ) + parser.add_argument( + "--with-comments", + action="store_true", + help="Include comments in the output", + ) + parser.add_argument( + "--wrap-objects", + action="store_true", + help="Wrap object values as an inline HCL2", + ) + parser.add_argument( + "--wrap-tuples", + action="store_true", + help="Wrap tuple values an inline HCL2", + ) + parser.add_argument( + "--no-explicit-blocks", + action="store_true", + help="Disable explicit block markers", + ) + parser.add_argument( + "--no-preserve-heredocs", + action="store_true", + help="Convert heredocs to plain strings", + ) + parser.add_argument( + "--force-parens", + action="store_true", + help="Force parentheses around all operations", + ) + parser.add_argument( + "--no-preserve-scientific", + action="store_true", + help="Convert scientific notation to standard floats", + ) + + # JSON output formatting + parser.add_argument( + "--json-indent", + type=int, + default=2, + metavar="N", + help="JSON indentation width (default: 2)", + ) + + args = parser.parse_args() + + options = SerializationOptions( + with_meta=args.with_meta, + with_comments=args.with_comments, + wrap_objects=args.wrap_objects, + wrap_tuples=args.wrap_tuples, + explicit_blocks=not args.no_explicit_blocks, + preserve_heredocs=not args.no_preserve_heredocs, + force_operation_parentheses=args.force_parens, + preserve_scientific_notation=not args.no_preserve_scientific, + ) + json_indent = args.json_indent + + def convert(in_file, out_file): + _hcl_to_json(in_file, out_file, options, json_indent=json_indent) + out_file.write("\n") + + if args.PATH == "-": + _convert_stdin(convert) + elif os.path.isfile(args.PATH): + _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE) + elif os.path.isdir(args.PATH): + _convert_directory( + args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE, + in_extensions={".tf", ".hcl"}, out_extension=".json", + ) + else: + raise RuntimeError("Invalid Path", args.PATH) diff --git a/cli/helpers.py b/cli/helpers.py new file mode 100644 index 00000000..bef6ba6f --- /dev/null +++ b/cli/helpers.py @@ -0,0 +1,92 @@ +"""Shared file-conversion helpers for the HCL2 CLI commands.""" +import json +import os +import sys +from typing import Callable, IO, Set, Tuple + +from lark import UnexpectedCharacters, UnexpectedToken + +# Exceptions that can be skipped when -s is passed +HCL_SKIPPABLE = (UnexpectedToken, UnexpectedCharacters, UnicodeDecodeError) +JSON_SKIPPABLE = (json.JSONDecodeError, UnicodeDecodeError) + + +def _convert_single_file( + in_path: str, + out_path: str, + convert_fn: Callable[[IO, IO], None], + skip: bool, + skippable: Tuple[type, ...], +) -> None: + with open(in_path, "r", encoding="utf-8") as in_file: + print(in_path, file=sys.stderr, flush=True) + if out_path is not None: + try: + with open(out_path, "w", encoding="utf-8") as out_file: + convert_fn(in_file, out_file) + except skippable: + if skip: + return + raise + else: + try: + convert_fn(in_file, sys.stdout) + sys.stdout.write("\n") + except skippable: + if skip: + return + raise + + +def _convert_directory( + in_path: str, + out_path: str, + convert_fn: Callable[[IO, IO], None], + skip: bool, + skippable: Tuple[type, ...], + in_extensions: Set[str], + out_extension: str, +) -> None: + if out_path is None: + raise RuntimeError("Positional OUT_PATH parameter shouldn't be empty") + if not os.path.exists(out_path): + os.mkdir(out_path) + + processed_files: set = set() + for current_dir, _, files in os.walk(in_path): + dir_prefix = os.path.commonpath([in_path, current_dir]) + relative_current_dir = os.path.relpath(current_dir, dir_prefix) + current_out_path = os.path.normpath( + os.path.join(out_path, relative_current_dir) + ) + if not os.path.exists(current_out_path): + os.mkdir(current_out_path) + for file_name in files: + _, ext = os.path.splitext(file_name) + if ext not in in_extensions: + continue + + in_file_path = os.path.join(current_dir, file_name) + out_file_path = os.path.join(current_out_path, file_name) + out_file_path = os.path.splitext(out_file_path)[0] + out_extension + + if in_file_path in processed_files or out_file_path in processed_files: + continue + + processed_files.add(in_file_path) + processed_files.add(out_file_path) + + with open(in_file_path, "r", encoding="utf-8") as in_file: + print(in_file_path, file=sys.stderr, flush=True) + try: + with open(out_file_path, "w", encoding="utf-8") as out_file: + convert_fn(in_file, out_file) + except skippable: + if skip: + continue + raise + + +def _convert_stdin(convert_fn: Callable[[IO, IO], None]) -> None: + convert_fn(sys.stdin, sys.stdout) + sys.stdout.write("\n") diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py new file mode 100644 index 00000000..48ade7c0 --- /dev/null +++ b/cli/json_to_hcl.py @@ -0,0 +1,124 @@ +"""``jsontohcl2`` CLI entry point — convert JSON files to HCL2.""" +import argparse +import json +import os +from typing import IO + +from hcl2 import dump +from hcl2.deserializer import DeserializerOptions +from hcl2.formatter import FormatterOptions +from hcl2.version import __version__ +from .helpers import JSON_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin + + +def _json_to_hcl( + in_file: IO, + out_file: IO, + d_opts: DeserializerOptions, + f_opts: FormatterOptions, +) -> None: + data = json.load(in_file) + dump(data, out_file, deserializer_options=d_opts, formatter_options=f_opts) + + +def main(): + """The ``jsontohcl2`` console_scripts entry point.""" + parser = argparse.ArgumentParser( + description="Convert JSON files to HCL2", + ) + parser.add_argument( + "-s", dest="skip", action="store_true", help="Skip un-parsable files" + ) + parser.add_argument( + "PATH", + help='The file or directory to convert (use "-" for stdin)', + ) + parser.add_argument( + "OUT_PATH", + nargs="?", + help="The path to write output to. Optional for single file (defaults to stdout)", + ) + parser.add_argument("--version", action="version", version=__version__) + + # DeserializerOptions flags + parser.add_argument( + "--colon-separator", + action="store_true", + help="Use colons instead of equals in object elements", + ) + parser.add_argument( + "--no-trailing-comma", + action="store_true", + help="Omit trailing commas in object elements", + ) + parser.add_argument( + "--heredocs-to-strings", + action="store_true", + help="Convert heredocs to plain strings", + ) + parser.add_argument( + "--strings-to-heredocs", + action="store_true", + help="Convert strings containing escaped newlines to heredocs", + ) + + # FormatterOptions flags + parser.add_argument( + "--indent", + type=int, + default=2, + metavar="N", + help="Indentation width (default: 2)", + ) + parser.add_argument( + "--no-open-empty-blocks", + action="store_true", + help="Collapse empty blocks to a single line", + ) + parser.add_argument( + "--no-open-empty-objects", + action="store_true", + help="Collapse empty objects to a single line", + ) + parser.add_argument( + "--open-empty-tuples", + action="store_true", + help="Expand empty tuples across multiple lines", + ) + parser.add_argument( + "--no-align", + action="store_true", + help="Disable vertical alignment of attributes and object elements", + ) + + args = parser.parse_args() + + d_opts = DeserializerOptions( + object_elements_colon=args.colon_separator, + object_elements_trailing_comma=not args.no_trailing_comma, + heredocs_to_strings=args.heredocs_to_strings, + strings_to_heredocs=args.strings_to_heredocs, + ) + f_opts = FormatterOptions( + indent_length=args.indent, + open_empty_blocks=not args.no_open_empty_blocks, + open_empty_objects=not args.no_open_empty_objects, + open_empty_tuples=args.open_empty_tuples, + vertically_align_attributes=not args.no_align, + vertically_align_object_elements=not args.no_align, + ) + + def convert(in_file, out_file): + _json_to_hcl(in_file, out_file, d_opts, f_opts) + + if args.PATH == "-": + _convert_stdin(convert) + elif os.path.isfile(args.PATH): + _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE) + elif os.path.isdir(args.PATH): + _convert_directory( + args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE, + in_extensions={".json"}, out_extension=".tf", + ) + else: + raise RuntimeError("Invalid Path", args.PATH) diff --git a/hcl2/__main__.py b/hcl2/__main__.py index f1a58938..7431bb13 100644 --- a/hcl2/__main__.py +++ b/hcl2/__main__.py @@ -1,108 +1,5 @@ -#!/usr/bin/env python -""" -This script recursively converts hcl2 files to json - -Usage: - hcl2tojson [-s] PATH [OUT_PATH] - -Options: - -s Skip un-parsable files - PATH The path to convert - OUT_PATH The path to write files to - --with-meta If set add meta parameters to the output_json like __start_line__ and __end_line__ -""" -import argparse -import json -import os -import sys - -from lark import UnexpectedCharacters, UnexpectedToken - -from . import load -from .utils import SerializationOptions -from .version import __version__ - - -def main(): - """The `console_scripts` entry point""" - - parser = argparse.ArgumentParser( - description="This script recursively converts hcl2 files to json" - ) - parser.add_argument( - "-s", dest="skip", action="store_true", help="Skip un-parsable files" - ) - parser.add_argument("PATH", help="The file or directory to convert") - parser.add_argument( - "OUT_PATH", - nargs="?", - help="The path where to write files to. Optional when parsing a single file. " - "Output is printed to stdout if OUT_PATH is blank", - ) - parser.add_argument("--version", action="version", version=__version__) - parser.add_argument( - "--with-meta", - action="store_true", - help="If set add meta parameters to the output_json like __start_line__ and __end_line__", - ) - - args = parser.parse_args() - - skippable_exceptions = (UnexpectedToken, UnexpectedCharacters, UnicodeDecodeError) - - if os.path.isfile(args.PATH): - with open(args.PATH, "r", encoding="utf-8") as in_file: - # pylint: disable=R1732 - out_file = ( - sys.stdout - if args.OUT_PATH is None - else open(args.OUT_PATH, "w", encoding="utf-8") - ) - print(args.PATH, file=sys.stderr, flush=True) - options = SerializationOptions(with_meta=True) if args.with_meta else None - json.dump(load(in_file, serialization_options=options), out_file) - if args.OUT_PATH is None: - out_file.write("\n") - out_file.close() - elif os.path.isdir(args.PATH): - processed_files = set() - if args.OUT_PATH is None: - raise RuntimeError("Positional OUT_PATH parameter shouldn't be empty") - if not os.path.exists(args.OUT_PATH): - os.mkdir(args.OUT_PATH) - for current_dir, _, files in os.walk(args.PATH): - dir_prefix = os.path.commonpath([args.PATH, current_dir]) - relative_current_dir = os.path.relpath(current_dir, dir_prefix) - current_out_path = os.path.normpath( - os.path.join(args.OUT_PATH, relative_current_dir) - ) - if not os.path.exists(current_out_path): - os.mkdir(current_out_path) - for file_name in files: - in_file_path = os.path.join(current_dir, file_name) - out_file_path = os.path.join(current_out_path, file_name) - out_file_path = os.path.splitext(out_file_path)[0] + ".json" - - # skip any files that we already processed or generated to avoid loops and file lock errors - if in_file_path in processed_files or out_file_path in processed_files: - continue - - processed_files.add(in_file_path) - processed_files.add(out_file_path) - - with open(in_file_path, "r", encoding="utf-8") as in_file: - print(in_file_path, file=sys.stderr, flush=True) - try: - parsed_data = load(in_file) - except skippable_exceptions: - if args.skip: - continue - raise - with open(out_file_path, "w", encoding="utf-8") as out_file: - json.dump(parsed_data, out_file) - else: - raise RuntimeError("Invalid Path", args.PATH) - +"""Allow ``python -m hcl2`` to run the hcl2tojson command.""" +from cli.hcl_to_json import main if __name__ == "__main__": main() diff --git a/pyproject.toml b/pyproject.toml index 4440461a..0a9e0254 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,10 +40,11 @@ content-type = "text/markdown" Homepage = "https://github.com/amplify-education/python-hcl2" [project.scripts] -hcl2tojson = "hcl2.__main__:main" +hcl2tojson = "cli.hcl_to_json:main" +jsontohcl2 = "cli.json_to_hcl:main" [tool.setuptools] -packages = ["hcl2"] +packages = ["hcl2", "hcl2.rules", "cli"] zip-safe = false include-package-data = true diff --git a/test/unit/cli/__init__.py b/test/unit/cli/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py new file mode 100644 index 00000000..67c8c48f --- /dev/null +++ b/test/unit/cli/test_hcl_to_json.py @@ -0,0 +1,254 @@ +import json +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.hcl_to_json import main + + +SIMPLE_HCL = 'x = 1\n' +SIMPLE_JSON_DICT = {"x": 1} + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def _read_file(path): + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +class TestHclToJson(TestCase): + + def test_single_file_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, SIMPLE_HCL) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertEqual(result["x"], 1) + + def test_single_file_to_output(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "test.json") + _write_file(hcl_path, SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", hcl_path, out_path]): + main() + + result = json.loads(_read_file(out_path)) + self.assertEqual(result["x"], 1) + + def test_stdin(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_HCL) + with patch("sys.argv", ["hcl2tojson", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertEqual(result["x"], 1) + + def test_directory_mode(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.tf"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "b.hcl"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "readme.txt"), "not hcl") + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.json"))) + self.assertTrue(os.path.exists(os.path.join(out_dir, "b.json"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "readme.json"))) + + result = json.loads(_read_file(os.path.join(out_dir, "a.json"))) + self.assertEqual(result["x"], 1) + + def test_with_meta_flag(self): + hcl_block = 'resource "a" "b" {\n x = 1\n}\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_block) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "--with-meta", hcl_path]): + with patch("sys.stdout", stdout): + main() + + result = json.loads(stdout.getvalue()) + self.assertIn("resource", result) + + def test_no_comments_flag(self): + hcl_with_comment = '# a comment\nx = 1\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_with_comment) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "--with-comments", hcl_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn("comment", output) + + def test_wrap_objects_flag(self): + hcl_input = 'x = {\n a = 1\n}\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_input) + + stdout_default = StringIO() + stdout_wrapped = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout_default): + main() + with patch("sys.argv", ["hcl2tojson", "--wrap-objects", hcl_path]): + with patch("sys.stdout", stdout_wrapped): + main() + + default = json.loads(stdout_default.getvalue()) + wrapped = json.loads(stdout_wrapped.getvalue()) + self.assertNotEqual(default["x"], wrapped["x"]) + + def test_wrap_tuples_flag(self): + hcl_input = 'x = [1, 2]\n' + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_input) + + stdout_default = StringIO() + stdout_wrapped = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout_default): + main() + with patch("sys.argv", ["hcl2tojson", "--wrap-tuples", hcl_path]): + with patch("sys.stdout", stdout_wrapped): + main() + + default = json.loads(stdout_default.getvalue()) + wrapped = json.loads(stdout_wrapped.getvalue()) + self.assertNotEqual(default["x"], wrapped["x"]) + + def test_skip_flag(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "good.tf"), SIMPLE_HCL) + _write_file(os.path.join(in_dir, "bad.tf"), "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", "-s", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "good.json"))) + + def test_directory_requires_out_path(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + os.mkdir(in_dir) + _write_file(os.path.join(in_dir, "a.tf"), SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", in_dir]): + with self.assertRaises(RuntimeError): + main() + + def test_invalid_path_raises_error(self): + with patch("sys.argv", ["hcl2tojson", "/nonexistent/path/foo.tf"]): + with self.assertRaises(RuntimeError): + main() + + +class TestSingleFileErrorHandling(TestCase): + + def test_skip_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "out.json") + _write_file(in_path, "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): + main() + + if os.path.exists(out_path): + self.assertEqual(_read_file(out_path), "") + + def test_raise_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + out_path = os.path.join(tmpdir, "out.json") + _write_file(in_path, "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", in_path, out_path]): + with self.assertRaises(Exception): + main() + + def test_skip_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + _write_file(in_path, "this is {{{{ not valid hcl") + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", "-s", in_path]): + with patch("sys.stdout", stdout): + main() + + self.assertEqual(stdout.getvalue(), "") + + def test_raise_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.tf") + _write_file(in_path, "this is {{{{ not valid hcl") + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", in_path]): + with patch("sys.stdout", stdout): + with self.assertRaises(Exception): + main() + + +class TestDirectoryEdgeCases(TestCase): + + def test_subdirectory_creation(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + sub_dir = os.path.join(in_dir, "sub") + out_dir = os.path.join(tmpdir, "output") + os.makedirs(sub_dir) + + _write_file(os.path.join(sub_dir, "nested.tf"), SIMPLE_HCL) + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + main() + + self.assertTrue( + os.path.exists(os.path.join(out_dir, "sub", "nested.json")) + ) + + def test_directory_raise_error_without_skip(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "bad.tf"), "this is {{{{ not valid hcl") + + with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): + with self.assertRaises(Exception): + main() diff --git a/test/unit/cli/test_helpers.py b/test/unit/cli/test_helpers.py new file mode 100644 index 00000000..6859d0ab --- /dev/null +++ b/test/unit/cli/test_helpers.py @@ -0,0 +1,174 @@ +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.helpers import _convert_single_file, _convert_directory, _convert_stdin + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +class TestConvertSingleFile(TestCase): + + def test_does_not_close_stdout(self): + """Regression test: stdout must not be closed after writing.""" + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "test.txt") + _write_file(path, "hello") + + captured = StringIO() + + def convert(in_f, out_f): + out_f.write(in_f.read()) + + with patch("sys.stdout", captured): + _convert_single_file(path, None, convert, False, (Exception,)) + + self.assertFalse(captured.closed) + self.assertIn("hello", captured.getvalue()) + + def test_skip_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + out_path = os.path.join(tmpdir, "out.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + _convert_single_file(in_path, out_path, convert, True, (ValueError,)) + + def test_raise_error_with_output_file(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + out_path = os.path.join(tmpdir, "out.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + with self.assertRaises(ValueError): + _convert_single_file(in_path, out_path, convert, False, (ValueError,)) + + def test_skip_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + stdout = StringIO() + with patch("sys.stdout", stdout): + _convert_single_file(in_path, None, convert, True, (ValueError,)) + + self.assertEqual(stdout.getvalue(), "") + + def test_raise_error_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_path = os.path.join(tmpdir, "test.txt") + _write_file(in_path, "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + stdout = StringIO() + with patch("sys.stdout", stdout): + with self.assertRaises(ValueError): + _convert_single_file(in_path, None, convert, False, (ValueError,)) + + +class TestConvertDirectory(TestCase): + + def test_filters_by_extension(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.tf"), "content") + _write_file(os.path.join(in_dir, "b.txt"), "content") + + converted_files = [] + + def convert(in_f, out_f): + out_f.write(in_f.read()) + converted_files.append(True) + + _convert_directory( + in_dir, out_dir, convert, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + self.assertEqual(len(converted_files), 1) + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.json"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "b.json"))) + + def test_requires_out_path(self): + with tempfile.TemporaryDirectory() as tmpdir: + with self.assertRaises(RuntimeError): + _convert_directory( + tmpdir, None, lambda i, o: None, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + def test_subdirectory_creation(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + sub_dir = os.path.join(in_dir, "sub") + out_dir = os.path.join(tmpdir, "output") + os.makedirs(sub_dir) + + _write_file(os.path.join(sub_dir, "nested.tf"), "content") + + def convert(in_f, out_f): + out_f.write(in_f.read()) + + _convert_directory( + in_dir, out_dir, convert, False, (Exception,), + in_extensions={".tf"}, out_extension=".json", + ) + + self.assertTrue( + os.path.exists(os.path.join(out_dir, "sub", "nested.json")) + ) + + def test_raise_error_without_skip(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "bad.tf"), "data") + + def convert(in_f, out_f): + raise ValueError("boom") + + with self.assertRaises(ValueError): + _convert_directory( + in_dir, out_dir, convert, False, (ValueError,), + in_extensions={".tf"}, out_extension=".json", + ) + + +class TestConvertStdin(TestCase): + + def test_stdin_forward(self): + stdout = StringIO() + captured = [] + + def convert(in_f, out_f): + data = in_f.read() + captured.append(data) + out_f.write("output") + + with patch("sys.stdin", StringIO("input")), \ + patch("sys.stdout", stdout): + _convert_stdin(convert) + + self.assertEqual(captured[0], "input") + self.assertIn("output", stdout.getvalue()) diff --git a/test/unit/cli/test_json_to_hcl.py b/test/unit/cli/test_json_to_hcl.py new file mode 100644 index 00000000..469d9188 --- /dev/null +++ b/test/unit/cli/test_json_to_hcl.py @@ -0,0 +1,156 @@ +import json +import os +import tempfile +from io import StringIO +from unittest import TestCase +from unittest.mock import patch + +from cli.json_to_hcl import main + + +SIMPLE_JSON_DICT = {"x": 1} +SIMPLE_JSON = json.dumps(SIMPLE_JSON_DICT) + +BLOCK_JSON_DICT = { + "resource": [ + { + "aws_instance": [ + { + "example": [ + {"ami": "abc-123"} + ] + } + ] + } + ] +} +BLOCK_JSON = json.dumps(BLOCK_JSON_DICT) + + +def _write_file(path, content): + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def _read_file(path): + with open(path, "r", encoding="utf-8") as f: + return f.read() + + +class TestJsonToHcl(TestCase): + + def test_single_file_to_stdout(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, SIMPLE_JSON) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue().strip() + self.assertIn("x", output) + self.assertIn("1", output) + + def test_single_file_to_output(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + out_path = os.path.join(tmpdir, "test.tf") + _write_file(json_path, SIMPLE_JSON) + + with patch("sys.argv", ["jsontohcl2", json_path, out_path]): + main() + + output = _read_file(out_path) + self.assertIn("x", output) + self.assertIn("1", output) + + def test_stdin(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_JSON) + with patch("sys.argv", ["jsontohcl2", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + output = stdout.getvalue().strip() + self.assertIn("x", output) + self.assertIn("1", output) + + def test_directory_mode(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "a.json"), SIMPLE_JSON) + _write_file(os.path.join(in_dir, "readme.txt"), "not json") + + with patch("sys.argv", ["jsontohcl2", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "a.tf"))) + self.assertFalse(os.path.exists(os.path.join(out_dir, "readme.tf"))) + + def test_indent_flag(self): + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, BLOCK_JSON) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--indent", "4", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn(" ami", output) + + def test_no_align_flag(self): + hcl_json = json.dumps({"short": 1, "very_long_name": 2}) + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, hcl_json) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--no-align", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + for line in output.strip().split("\n"): + line = line.strip() + if line.startswith("short"): + self.assertNotIn(" =", line) + + def test_colon_separator_flag(self): + hcl_json = json.dumps({"x": {"a": 1}}) + with tempfile.TemporaryDirectory() as tmpdir: + json_path = os.path.join(tmpdir, "test.json") + _write_file(json_path, hcl_json) + + stdout = StringIO() + with patch("sys.argv", ["jsontohcl2", "--colon-separator", json_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertIn(":", output) + + def test_skip_flag_on_invalid_json(self): + with tempfile.TemporaryDirectory() as tmpdir: + in_dir = os.path.join(tmpdir, "input") + out_dir = os.path.join(tmpdir, "output") + os.mkdir(in_dir) + + _write_file(os.path.join(in_dir, "good.json"), SIMPLE_JSON) + _write_file(os.path.join(in_dir, "bad.json"), "{not valid json") + + with patch("sys.argv", ["jsontohcl2", "-s", in_dir, out_dir]): + main() + + self.assertTrue(os.path.exists(os.path.join(out_dir, "good.tf"))) + + def test_invalid_path_raises_error(self): + with patch("sys.argv", ["jsontohcl2", "/nonexistent/path/foo.json"]): + with self.assertRaises(RuntimeError): + main() From 776a3f06128c62ae6e2ce3b342f7d4ebbfd2e5c3 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 2 Mar 2026 16:54:43 +0100 Subject: [PATCH 30/42] minor fixes --- hcl2/const.py | 3 +-- hcl2/deserializer.py | 10 +++++++--- test/integration/hcl2_original/smoke.tf | 2 +- test/integration/json_reserialized/floats.json | 2 +- .../json_reserialized/string_interpolations.json | 2 +- test/integration/json_serialized/floats.json | 2 +- .../json_serialized/string_interpolations.json | 2 +- test/integration/test_round_trip.py | 8 ++++---- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1bd4a4ce..c36a5321 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -1,5 +1,4 @@ """Module for various constants used across the library""" -START_LINE_KEY = "__start_line__" -END_LINE_KEY = "__end_line__" IS_BLOCK = "__is_block__" +COMMENTS_KEY = "__comments__" diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 167d21f9..5d308fb7 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -8,7 +8,7 @@ from regex import regex from hcl2.parser import parser as _get_parser -from hcl2.const import IS_BLOCK +from hcl2.const import IS_BLOCK, COMMENTS_KEY from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( BlockRule, @@ -129,7 +129,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: else: # otherwise it's just an attribute - if key != IS_BLOCK: + if not self._is_reserved_key(key): children.append(self._deserialize_attribute(key, val)) return children @@ -254,7 +254,7 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) while isinstance(body, dict) and not body.get(IS_BLOCK): - non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + non_block_keys = [k for k in body.keys() if not self._is_reserved_key(k)] if len(non_block_keys) == 1: # This is another label level label = non_block_keys[0] @@ -335,6 +335,10 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: return ObjectElemRule(result) + def _is_reserved_key(self, key: str) -> bool: + """Check if a key is a reserved metadata key that should be skipped during deserialization.""" + return key in (IS_BLOCK, COMMENTS_KEY) + def _is_expression(self, value: Any) -> bool: return isinstance(value, str) and value.startswith("${") and value.endswith("}") diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index 99537532..e2a0843b 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -37,7 +37,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } diff --git a/test/integration/json_reserialized/floats.json b/test/integration/json_reserialized/floats.json index 8246516c..db301445 100644 --- a/test/integration/json_reserialized/floats.json +++ b/test/integration/json_reserialized/floats.json @@ -28,4 +28,4 @@ "__is_block__": true } ] -} \ No newline at end of file +} diff --git a/test/integration/json_reserialized/string_interpolations.json b/test/integration/json_reserialized/string_interpolations.json index 059fcfbf..f9df252c 100644 --- a/test/integration/json_reserialized/string_interpolations.json +++ b/test/integration/json_reserialized/string_interpolations.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/integration/json_serialized/floats.json b/test/integration/json_serialized/floats.json index 8246516c..db301445 100644 --- a/test/integration/json_serialized/floats.json +++ b/test/integration/json_serialized/floats.json @@ -28,4 +28,4 @@ "__is_block__": true } ] -} \ No newline at end of file +} diff --git a/test/integration/json_serialized/string_interpolations.json b/test/integration/json_serialized/string_interpolations.json index 059fcfbf..f9df252c 100644 --- a/test/integration/json_serialized/string_interpolations.json +++ b/test/integration/json_serialized/string_interpolations.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py index 3d2bbbb0..a963c4f8 100644 --- a/test/integration/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -115,7 +115,7 @@ def test_hcl_to_json(self): self.assertEqual( actual, expected, - f"HCL → JSON serialization mismatch for {suite}", + f"HCL → JSON serialization mismatch for suite {suite}", ) @@ -137,7 +137,7 @@ def test_json_reserialization(self): self.assertEqual( actual, expected, - f"JSON reserialization mismatch for {suite}", + f"JSON reserialization mismatch for suite {suite}", ) @@ -159,7 +159,7 @@ def test_json_to_hcl(self): self.assertMultiLineEqual( actual, expected, - f"HCL reconstruction mismatch for {suite}", + f"HCL reconstruction mismatch for suite {suite}", ) @@ -186,6 +186,6 @@ def test_full_round_trip(self): self.assertEqual( reserialized, serialized, - f"Full round-trip mismatch for {suite}: " + f"Full round-trip mismatch for suite {suite}: " f"HCL → JSON → HCL → JSON did not produce identical JSON", ) From 745b1c70183578b84a14c9d5420bc7b54318fbd8 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 2 Mar 2026 17:44:27 +0100 Subject: [PATCH 31/42] fixes to for/tuple expressions formatting; unit tests for deserializer and formatter --- hcl2/deserializer.py | 1 + hcl2/formatter.py | 32 +- test/integration/hcl2_reconstructed/smoke.tf | 4 +- test/unit/test_deserializer.py | 571 ++++++++++++++ test/unit/test_formatter.py | 783 +++++++++++++++++++ 5 files changed, 1385 insertions(+), 6 deletions(-) create mode 100644 test/unit/test_deserializer.py create mode 100644 test/unit/test_formatter.py diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 5d308fb7..328427db 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -66,6 +66,7 @@ class DeserializerOptions: strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True + # with_comments: bool = False # TODO class LarkElementTreeDeserializer(ABC): diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 35fb6b05..23302187 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -14,6 +14,8 @@ from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, + ForIntroRule, + ForCondRule, ) from hcl2.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA from hcl2.rules.whitespace import NewLineOrCommentRule @@ -161,10 +163,20 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) + elif isinstance(child, (ForIntroRule, ForCondRule)): + for sub_child in child.children: + if isinstance(sub_child, ExprTermRule): + self.format_expression(sub_child, indent_level + 1) - indexes = [1, 3, 5, 7] - for index in indexes: + for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) + + if expression.condition is not None: + expression.children[5] = self._build_newline(indent_level) + else: + expression.children[5] = None + + expression.children[7] = self._build_newline(indent_level) self._deindent_last_line() def format_forobjectexpr( @@ -173,11 +185,23 @@ def format_forobjectexpr( for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) + elif isinstance(child, (ForIntroRule, ForCondRule)): + for sub_child in child.children: + if isinstance(sub_child, ExprTermRule): + self.format_expression(sub_child, indent_level + 1) - indexes = [1, 3, 12] - for index in indexes: + for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) + expression.children[6] = None + expression.children[8] = None + + if expression.condition is not None: + expression.children[10] = self._build_newline(indent_level) + else: + expression.children[10] = None + + expression.children[12] = self._build_newline(indent_level) self._deindent_last_line() def _vertically_align_attributes_in_body(self, body: BodyRule): diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 40e2597d..970cc1cf 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -46,8 +46,8 @@ block { aws_account_ids = [ for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - ] - } ... if substr(bucket_name, 0, 1) == "l" + } ... + if substr(bucket_name, 0, 1) == "l" } } diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py new file mode 100644 index 00000000..908b1302 --- /dev/null +++ b/test/unit/test_deserializer.py @@ -0,0 +1,571 @@ +from unittest import TestCase + +from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.rules.base import StartRule, BodyRule, BlockRule, AttributeRule +from hcl2.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyExpressionRule, + ObjectElemKeyRule, +) +from hcl2.rules.expressions import ExprTermRule +from hcl2.rules.literal_rules import IdentifierRule, IntLitRule, FloatLitRule +from hcl2.rules.strings import ( + StringRule, + StringPartRule, + InterpolationRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, +) +from hcl2.rules.tokens import ( + STRING_CHARS, + ESCAPED_INTERPOLATION, + COMMA, + LSQB, + RSQB, + LBRACE, + RBRACE, + EQ, + COLON, +) + + +# --- helpers --- + + +def _deser(options=None): + return BaseDeserializer(options) + + +# --- DeserializerOptions tests --- + + +class TestDeserializerOptions(TestCase): + def test_defaults(self): + opts = DeserializerOptions() + self.assertFalse(opts.heredocs_to_strings) + self.assertFalse(opts.strings_to_heredocs) + self.assertFalse(opts.object_elements_colon) + self.assertTrue(opts.object_elements_trailing_comma) + + +# --- load_python top-level dispatch --- + + +class TestBaseDeserializerLoadPython(TestCase): + def test_dict_input_produces_start_with_body(self): + d = _deser() + result = d.load_python({"x": 1}) + self.assertIsInstance(result, StartRule) + self.assertIsInstance(result.body, BodyRule) + + def test_dict_body_contains_attribute(self): + d = _deser() + result = d.load_python({"x": 1}) + body = result.body + self.assertEqual(len(body.children), 1) + self.assertIsInstance(body.children[0], AttributeRule) + + def test_list_input_produces_start_wrapping_tuple(self): + d = _deser() + result = d.load_python([1, 2]) + self.assertIsInstance(result, StartRule) + # The child should be a TupleRule (via _deserialize) + child = result.children[0] + self.assertIsInstance(child, TupleRule) + + def test_scalar_string_input(self): + d = _deser() + result = d.load_python("hello") + self.assertIsInstance(result, StartRule) + child = result.children[0] + self.assertIsInstance(child, IdentifierRule) + self.assertEqual(child.token.value, "hello") + + def test_loads_parses_json(self): + d = _deser() + result = d.loads('{"key": 42}') + self.assertIsInstance(result, StartRule) + body = result.body + self.assertEqual(len(body.children), 1) + self.assertIsInstance(body.children[0], AttributeRule) + + +# --- _deserialize_text branches --- + + +class TestDeserializeText(TestCase): + def test_bool_true(self): + d = _deser() + result = d._deserialize_text(True) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "true") + + def test_bool_false(self): + d = _deser() + result = d._deserialize_text(False) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "false") + + def test_bool_before_int(self): + """bool is subclass of int; ensure True doesn't produce IntLitRule.""" + d = _deser() + result = d._deserialize_text(True) + self.assertNotIsInstance(result, IntLitRule) + self.assertIsInstance(result, IdentifierRule) + + def test_int_value(self): + d = _deser() + result = d._deserialize_text(42) + self.assertIsInstance(result, IntLitRule) + self.assertEqual(result.token.value, 42) + + def test_float_value(self): + d = _deser() + result = d._deserialize_text(3.14) + self.assertIsInstance(result, FloatLitRule) + self.assertEqual(result.token.value, 3.14) + + def test_quoted_string(self): + d = _deser() + result = d._deserialize_text('"hello"') + self.assertIsInstance(result, StringRule) + + def test_unquoted_string_identifier(self): + d = _deser() + result = d._deserialize_text("my_var") + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "my_var") + + def test_expression_string(self): + d = _deser() + result = d._deserialize_text("${var.x}") + self.assertIsInstance(result, ExprTermRule) + + def test_non_string_non_numeric_fallback(self): + """Non-string, non-numeric values get str()-converted to identifier.""" + d = _deser() + result = d._deserialize_text(None) + self.assertIsInstance(result, IdentifierRule) + self.assertEqual(result.token.value, "None") + + def test_zero_int(self): + d = _deser() + result = d._deserialize_text(0) + self.assertIsInstance(result, IntLitRule) + self.assertEqual(result.token.value, 0) + + def test_negative_float(self): + d = _deser() + result = d._deserialize_text(-1.5) + self.assertIsInstance(result, FloatLitRule) + self.assertEqual(result.token.value, -1.5) + + +# --- heredoc handling --- + + +class TestDeserializeHeredocs(TestCase): + def test_preserved_heredoc(self): + d = _deser() + result = d._deserialize_text('"< value_name}""" + children = [ + LBRACE(), + _make_for_intro(), + _make_expr_term(_make_identifier(key_name)), + FOR_OBJECT_ARROW(), + _make_expr_term(_make_identifier(value_name)), + ] + if ellipsis: + children.append(ELLIPSIS()) + if condition is not None: + children.append(condition) + children.append(RBRACE()) + return ForObjectExprRule(children) + + +# --- format_fortupleexpr --- + + +class TestFormatForTupleExpr(TestCase): + def test_basic_no_condition_no_spurious_newline(self): + """No condition → index 5 should be None, no spurious blank line.""" + f = _fmt() + expr = _make_for_tuple_expr() + f.format_fortupleexpr(expr, indent_level=1) + + self.assertIsNone(expr.children[5]) + for idx in [1, 3, 7]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_basic_no_condition_deindents_closing(self): + """Last newline (before ]) should be deindented.""" + f = _fmt() + expr = _make_for_tuple_expr() + f.format_fortupleexpr(expr, indent_level=1) + + last_nl = expr.children[7] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_condition_newline_before_if(self): + """With condition → index 5 should be a newline before `if`.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_tuple_expr(condition=cond) + f.format_fortupleexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[5], NewLineOrCommentRule) + for idx in [1, 3, 7]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_with_condition_deindents_closing(self): + """Even with condition, last newline (before ]) is deindented.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_tuple_expr(condition=cond) + f.format_fortupleexpr(expr, indent_level=1) + + last_nl = expr.children[7] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_nested_value_object_formatting(self): + """Value expression containing an object should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + children = [ + LSQB(), + _make_for_intro(), + _make_expr_term(obj), + RSQB(), + ] + expr = ForTupleExprRule(children) + + f.format_fortupleexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + def test_for_intro_iterable_formatting(self): + """ForIntroRule's iterable expression should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + intro = ForIntroRule([ + FOR(), + _make_identifier("item"), + IN(), + _make_expr_term(obj), + COLON(), + ]) + children = [LSQB(), intro, _make_expr_term(_make_identifier("val")), RSQB()] + expr = ForTupleExprRule(children) + + f.format_fortupleexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + +# --- format_forobjectexpr --- + + +class TestFormatForObjectExpr(TestCase): + def test_basic_no_condition_no_ellipsis(self): + """No condition, no ellipsis → indices 6, 8, 10 should be None.""" + f = _fmt() + expr = _make_for_object_expr() + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + self.assertIsNone(expr.children[10]) + for idx in [1, 3, 12]: + self.assertIsInstance(expr.children[idx], NewLineOrCommentRule) + + def test_basic_deindents_closing(self): + """Last newline (before }) should be deindented.""" + f = _fmt() + expr = _make_for_object_expr() + f.format_forobjectexpr(expr, indent_level=1) + + last_nl = expr.children[12] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_condition_newline_before_if(self): + """With condition → index 10 should be a newline before `if`.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[10], NewLineOrCommentRule) + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + + def test_with_condition_deindents_closing(self): + """Even with condition, last newline (before }) is deindented.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + last_nl = expr.children[12] + self.assertEqual(_nlc_value(last_nl), "\n") + + def test_with_ellipsis_and_condition(self): + """With ellipsis and condition → index 10 is newline, 6/8 cleared.""" + f = _fmt() + cond = _make_for_cond() + expr = _make_for_object_expr(ellipsis=True, condition=cond) + f.format_forobjectexpr(expr, indent_level=1) + + self.assertIsInstance(expr.children[9], ELLIPSIS) + self.assertIsInstance(expr.children[10], NewLineOrCommentRule) + self.assertIsNone(expr.children[6]) + self.assertIsNone(expr.children[8]) + + def test_nested_value_tuple_formatting(self): + """Value expression containing a tuple should be formatted recursively.""" + f = _fmt() + inner_tup = _make_tuple([_make_expr_term(_make_identifier("a"))]) + children = [ + LBRACE(), + _make_for_intro(), + _make_expr_term(_make_identifier("k")), + FOR_OBJECT_ARROW(), + _make_expr_term(inner_tup), + RBRACE(), + ] + expr = ForObjectExprRule(children) + + f.format_forobjectexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in inner_tup._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) + + def test_for_cond_expression_formatting(self): + """ForCondRule's condition expression should be formatted recursively.""" + f = _fmt() + obj = _make_object([_make_object_elem("k", "v")]) + cond = ForCondRule([IF(), _make_expr_term(obj)]) + expr = _make_for_object_expr(condition=cond) + + f.format_forobjectexpr(expr, indent_level=1) + + nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) + self.assertGreater(nlc_count, 0) From 648696e6c58405a61f5c9c41da83c8b01842ffb3 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 19:46:41 +0100 Subject: [PATCH 32/42] hcl2/rules - fix pre-commit errors --- hcl2/rules/abstract.py | 32 +++++++++++- hcl2/rules/base.py | 30 +++++++++-- hcl2/rules/containers.py | 60 ++++++++++++++++------ hcl2/rules/expressions.py | 59 ++++++++++++++++----- hcl2/rules/for_expressions.py | 71 ++++++++++++++++--------- hcl2/rules/functions.py | 37 +++++++++---- hcl2/rules/indexing.py | 82 +++++++++++++++++++++++------ hcl2/rules/literal_rules.py | 29 ++++++++++- hcl2/rules/strings.py | 60 +++++++++++++++------- hcl2/rules/tokens.py | 97 ++++++++++++++++++++++------------- hcl2/rules/whitespace.py | 23 +++++++-- 11 files changed, 433 insertions(+), 147 deletions(-) diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index 316c777a..26cda89c 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -1,3 +1,5 @@ +"""Abstract base classes for the LarkElement tree intermediate representation.""" + from abc import ABC, abstractmethod from typing import Any, Union, List, Optional, Callable @@ -8,33 +10,42 @@ class LarkElement(ABC): + """Base class for all elements in the LarkElement tree.""" + @staticmethod @abstractmethod def lark_name() -> str: + """Return the corresponding Lark grammar rule or token name.""" raise NotImplementedError() - def __init__(self, index: int = -1, parent: "LarkElement" = None): + def __init__(self, index: int = -1, parent: Optional["LarkElement"] = None): self._index = index self._parent = parent def set_index(self, i: int): + """Set the position index of this element within its parent.""" self._index = i def set_parent(self, node: "LarkElement"): + """Set the parent element that contains this element.""" self._parent = node @abstractmethod def to_lark(self) -> Any: + """Convert this element back to a Lark Tree or Token.""" raise NotImplementedError() @abstractmethod def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this element to a Python object (dict, list, str, etc.).""" raise NotImplementedError() class LarkToken(LarkElement, ABC): + """Base class for terminal token elements (leaves of the tree).""" + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -42,21 +53,26 @@ def __init__(self, value: Union[str, int, float]): @property @abstractmethod def serialize_conversion(self) -> Callable: + """Return the callable used to convert this token's value during serialization.""" raise NotImplementedError() @property def value(self): + """Return the raw value of this token.""" return self._value def set_value(self, value: Any): + """Set the raw value of this token.""" self._value = value def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this token using its serialize_conversion callable.""" return self.serialize_conversion(self.value) def to_lark(self) -> Token: + """Convert this token back to a Lark Token.""" return Token(self.lark_name(), self.value) def __str__(self) -> str: @@ -67,25 +83,39 @@ def __repr__(self) -> str: class LarkRule(LarkElement, ABC): + """Base class for non-terminal rule elements (internal nodes of the tree). + + Subclasses should declare `_children_layout: Tuple[...]` (without assignment) + to document the expected positional structure of `_children`. For variable-length + rules, use `_children_layout: List[Union[...]]`. This annotation exists only in + `__annotations__` and does not create an attribute or conflict with the runtime + `_children` list. + """ + @abstractmethod def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this rule and its children to a Python object.""" raise NotImplementedError() @property def children(self) -> List[LarkElement]: + """Return the list of child elements.""" return self._children @property def parent(self): + """Return the parent element.""" return self._parent @property def index(self): + """Return the position index within the parent.""" return self._index def to_lark(self) -> Tree: + """Convert this rule and its children back to a Lark Tree.""" result_children = [] for child in self._children: if child is None: diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index a025949a..26a31247 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 structural elements (attributes, bodies, blocks).""" + from collections import defaultdict from typing import Tuple, Any, List, Union, Optional @@ -5,7 +7,7 @@ from hcl2.const import IS_BLOCK from hcl2.rules.abstract import LarkRule, LarkToken -from hcl2.rules.expressions import ExpressionRule, ExprTermRule +from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE @@ -15,7 +17,9 @@ class AttributeRule(LarkRule): - _children: Tuple[ + """Rule for key = value attribute assignments.""" + + _children_layout: Tuple[ IdentifierRule, EQ, ExprTermRule, @@ -23,25 +27,30 @@ class AttributeRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attribute" @property def identifier(self) -> IdentifierRule: + """Return the attribute name identifier.""" return self._children[0] @property def expression(self) -> ExprTermRule: + """Return the attribute value expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a single-entry dict.""" return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): + """Rule for a body containing attributes, blocks, and comments.""" - _children: List[ + _children_layout: List[ Union[ NewLineOrCommentRule, AttributeRule, @@ -51,11 +60,13 @@ class BodyRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "body" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a dict, grouping blocks under their type name.""" attribute_names = set() comments = [] inline_comments = [] @@ -92,26 +103,31 @@ def serialize( class StartRule(LarkRule): + """Rule for the top-level start rule of an HCL2 document.""" - _children: Tuple[BodyRule] + _children_layout: Tuple[BodyRule] @property def body(self) -> BodyRule: + """Return the document body.""" return self._children[0] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "start" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize by delegating to the body.""" return self.body.serialize(options) class BlockRule(LarkRule): + """Rule for HCL2 blocks (e.g. resource 'type' 'name' { ... }).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], LBRACE, @@ -128,19 +144,23 @@ def __init__(self, children, meta: Optional[Meta] = None): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "block" @property def labels(self) -> List[NAME]: + """Return the block label chain (type name, optional string labels).""" return list(filter(lambda label: label is not None, self._labels)) @property def body(self) -> BodyRule: + """Return the block body.""" return self._body def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a nested dict with labels as keys.""" result = self._body.serialize(options) if options.explicit_blocks: result.update({IS_BLOCK: True}) diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 3f590c5c..78e0bdeb 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 tuples, objects, and their elements.""" + from typing import Tuple, List, Optional, Union, Any from hcl2.rules.abstract import LarkRule @@ -32,8 +34,9 @@ class TupleRule(InlineCommentMixIn): + """Rule for tuple/array literals ([elem, ...]).""" - _children: Tuple[ + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], Tuple[ @@ -52,10 +55,12 @@ class TupleRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "tuple" @property def elements(self) -> List[ExpressionRule]: + """Return the expression elements of the tuple.""" return [ child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] @@ -63,6 +68,7 @@ def elements(self) -> List[ExpressionRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a Python list or bracketed string.""" if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] @@ -80,22 +86,26 @@ def serialize( class ObjectElemKeyRule(LarkRule): + """Rule for an object element key.""" key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] - _children: Tuple[key_T] + _children_layout: Tuple[key_T] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key" @property def value(self) -> key_T: + """Return the key value (identifier, string, or number).""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the key, coercing numbers to strings.""" result = self.value.serialize(options, context) # Object keys must be strings for JSON compatibility if isinstance(result, (int, float)): @@ -104,8 +114,9 @@ def serialize( class ObjectElemKeyExpressionRule(LarkRule): + """Rule for parenthesized expression keys in objects.""" - _children: Tuple[ + _children_layout: Tuple[ LPAR, ExpressionRule, RPAR, @@ -113,15 +124,18 @@ class ObjectElemKeyExpressionRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key_expression" @property def expression(self) -> ExpressionRule: + """Return the parenthesized key expression.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '(expression)' string.""" with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -130,8 +144,9 @@ def serialize( class ObjectElemKeyDotAccessor(LarkRule): + """Rule for dot-accessor keys in objects (e.g. a.b.c).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Tuple[ IdentifierRule, @@ -141,23 +156,27 @@ class ObjectElemKeyDotAccessor(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem_key_dot_accessor" @property def identifiers(self) -> List[IdentifierRule]: + """Return the chain of identifiers.""" return [child for child in self._children if isinstance(child, IdentifierRule)] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'a.b.c' string.""" return ".".join( identifier.serialize(options, context) for identifier in self.identifiers ) class ObjectElemRule(LarkRule): + """Rule for a single key = value element in an object.""" - _children: Tuple[ + _children_layout: Tuple[ ObjectElemKeyRule, Union[EQ, COLON], ExpressionRule, @@ -165,19 +184,23 @@ class ObjectElemRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object_elem" @property def key(self) -> ObjectElemKeyRule: + """Return the key rule.""" return self._children[0] @property def expression(self): + """Return the value expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a single-entry dict.""" return { self.key.serialize(options, context): self.expression.serialize( options, context @@ -186,8 +209,9 @@ def serialize( class ObjectRule(InlineCommentMixIn): + """Rule for object literals ({key = value, ...}).""" - _children: Tuple[ + _children_layout: Tuple[ LBRACE, Optional[NewLineOrCommentRule], Tuple[ @@ -201,10 +225,12 @@ class ObjectRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "object" @property def elements(self) -> List[ObjectElemRule]: + """Return the list of object element rules.""" return [ child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] @@ -212,21 +238,23 @@ def elements(self) -> List[ObjectElemRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a Python dict or braced string.""" if not options.wrap_objects and not context.inside_dollar_string: - result = {} + dict_result: dict = {} for element in self.elements: - result.update(element.serialize(options, context)) - - return result + dict_result.update(element.serialize(options, context)) + return dict_result with context.modify(inside_dollar_string=True): - result = "{" - result += ", ".join( - f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + str_result = "{" + str_result += ", ".join( + f"{element.key.serialize(options, context)}" + f" = " + f"{element.expression.serialize(options, context)}" for element in self.elements ) - result += "}" + str_result += "}" if not context.inside_dollar_string: - result = to_dollar_string(result) - return result + str_result = to_dollar_string(str_result) + return str_result diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index 1e1d0cd8..e6aa1670 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -1,6 +1,7 @@ +"""Rule classes for HCL2 expressions, conditionals, and binary/unary operations.""" + from abc import ABC -from copy import deepcopy -from typing import Any, Tuple, Optional +from typing import Any, Optional, Tuple from lark.tree import Meta @@ -22,8 +23,11 @@ class ExpressionRule(InlineCommentMixIn, ABC): + """Base class for all HCL2 expression rules.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "expression" def __init__( @@ -33,8 +37,12 @@ def __init__( self._parentheses = parentheses def _wrap_into_parentheses( - self, value: str, options=SerializationOptions(), context=SerializationContext() + self, + value: str, + _options=SerializationOptions(), + context=SerializationContext(), ) -> str: + """Wrap value in parentheses if inside a nested expression.""" # do not wrap into parentheses if # 1. already wrapped or # 2. is top-level expression (unless explicitly wrapped) @@ -53,8 +61,9 @@ def _wrap_into_parentheses( class ExprTermRule(ExpressionRule): + """Rule for expression terms, optionally wrapped in parentheses.""" - type_ = Tuple[ + _children_layout: Tuple[ Optional[LPAR], Optional[NewLineOrCommentRule], ExpressionRule, @@ -62,10 +71,9 @@ class ExprTermRule(ExpressionRule): Optional[RPAR], ] - _children: type_ - @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -84,15 +92,18 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def parentheses(self) -> bool: + """Return whether this term is wrapped in parentheses.""" return self._parentheses @property def expression(self) -> ExpressionRule: + """Return the inner expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize, handling parenthesized expression wrapping.""" with context.modify( inside_parentheses=self.parentheses or context.inside_parentheses ): @@ -107,8 +118,9 @@ def serialize( class ConditionalRule(ExpressionRule): + """Rule for ternary conditional expressions (condition ? true : false).""" - _children: Tuple[ + _children_layout: Tuple[ ExpressionRule, QMARK, Optional[NewLineOrCommentRule], @@ -121,6 +133,7 @@ class ConditionalRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -129,19 +142,23 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def condition(self) -> ExpressionRule: + """Return the condition expression.""" return self._children[0] @property def if_true(self) -> ExpressionRule: + """Return the true-branch expression.""" return self._children[3] @property def if_false(self) -> ExpressionRule: + """Return the false-branch expression.""" return self._children[7] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to ternary expression string.""" with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " @@ -159,8 +176,9 @@ def serialize( class BinaryTermRule(ExpressionRule): + """Rule for the operator+operand portion of a binary operation.""" - _children: Tuple[ + _children_layout: Tuple[ BinaryOperatorRule, Optional[NewLineOrCommentRule], ExprTermRule, @@ -168,6 +186,7 @@ class BinaryTermRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -176,20 +195,27 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def binary_operator(self) -> BinaryOperatorRule: + """Return the binary operator.""" return self._children[0] @property def expr_term(self) -> ExprTermRule: + """Return the right-hand operand.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" + """Serialize to 'operator operand' string.""" + op_str = self.binary_operator.serialize(options, context) + term_str = self.expr_term.serialize(options, context) + return f"{op_str} {term_str}" class BinaryOpRule(ExpressionRule): - _children: Tuple[ + """Rule for complete binary operations (lhs operator rhs).""" + + _children_layout: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], @@ -197,20 +223,23 @@ class BinaryOpRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_op" @property def expr_term(self) -> ExprTermRule: + """Return the left-hand operand.""" return self._children[0] @property def binary_term(self) -> BinaryTermRule: + """Return the binary term (operator + right-hand operand).""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to 'lhs operator rhs' string.""" with context.modify(inside_dollar_string=True): lhs = self.expr_term.serialize(options, context) operator = self.binary_term.binary_operator.serialize(options, context) @@ -227,25 +256,29 @@ def serialize( class UnaryOpRule(ExpressionRule): + """Rule for unary operations (e.g. negation, logical not).""" - _children: Tuple[LarkToken, ExprTermRule] + _children_layout: Tuple[LarkToken, ExprTermRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "unary_op" @property def operator(self) -> str: + """Return the unary operator string.""" return str(self._children[0]) @property def expr_term(self): + """Return the operand.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to 'operator operand' string.""" with context.modify(inside_dollar_string=True): result = f"{self.operator}{self.expr_term.serialize(options, context)}" diff --git a/hcl2/rules/for_expressions.py b/hcl2/rules/for_expressions.py index a1f24dcb..a062e66a 100644 --- a/hcl2/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -1,8 +1,9 @@ +"""Rule classes for HCL2 for-tuple and for-object expressions.""" + from typing import Any, Tuple, Optional, List from lark.tree import Meta -from hcl2.rules.abstract import LarkRule, LarkElement from hcl2.rules.expressions import ExpressionRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.tokens import ( @@ -17,6 +18,7 @@ COLON, ELLIPSIS, FOR_OBJECT_ARROW, + StaticStringToken, ) from hcl2.rules.whitespace import ( NewLineOrCommentRule, @@ -32,7 +34,7 @@ class ForIntroRule(InlineCommentMixIn): """Rule for the intro part of for expressions: 'for key, value in collection :'""" - _children: Tuple[ + _children_layout: Tuple[ FOR, Optional[NewLineOrCommentRule], IdentifierRule, @@ -49,6 +51,7 @@ class ForIntroRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): @@ -56,7 +59,10 @@ def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional comma and second identifier.""" identifiers = [child for child in children if isinstance(child, IdentifierRule)] second_identifier = identifiers[1] if len(identifiers) == 2 else None @@ -67,27 +73,28 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): super()._insert_optionals(children, sorted(indexes)) if second_identifier is not None: - children[3] = COMMA() + children[3] = COMMA() # type: ignore[abstract] # pylint: disable=abstract-class-instantiated children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: - """Returns the first iterator""" + """Return the first iterator identifier.""" return self._children[2] @property def second_iterator(self) -> Optional[IdentifierRule]: - """Returns the second iterator or None if not present""" + """Return the second iterator identifier, or None if not present.""" return self._children[4] @property def iterable(self) -> ExpressionRule: - """Returns the collection expression being iterated over""" + """Return the collection expression being iterated over.""" return self._children[8] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> str: + """Serialize to 'for key, value in collection : ' string.""" result = "for " result += f"{self.first_iterator.serialize(options, context)}" @@ -101,7 +108,7 @@ def serialize( class ForCondRule(InlineCommentMixIn): """Rule for the optional condition in for expressions: 'if condition'""" - _children: Tuple[ + _children_layout: Tuple[ IF, Optional[NewLineOrCommentRule], ExpressionRule, # condition expression @@ -109,6 +116,7 @@ class ForCondRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): @@ -117,19 +125,20 @@ def __init__(self, children, meta: Optional[Meta] = None): @property def condition_expr(self) -> ExpressionRule: - """Returns the condition expression""" + """Return the condition expression.""" return self._children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> str: + """Serialize to 'if condition' string.""" return f"if {self.condition_expr.serialize(options, context)}" class ForTupleExprRule(ExpressionRule): """Rule for tuple/array for expressions: [for item in items : expression]""" - _children: Tuple[ + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], ForIntroRule, @@ -143,13 +152,17 @@ class ForTupleExprRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional condition.""" condition = None for child in children: @@ -168,23 +181,23 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): @property def for_intro(self) -> ForIntroRule: - """Returns the for intro rule""" + """Return the for intro rule.""" return self._children[2] @property def value_expr(self) -> ExpressionRule: - """Returns the value expression""" + """Return the value expression.""" return self._children[4] @property def condition(self) -> Optional[ForCondRule]: - """Returns the optional condition rule""" + """Return the optional condition rule.""" return self._children[6] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to '[for ... : expr]' string.""" result = "[" with context.modify(inside_dollar_string=True): @@ -203,7 +216,7 @@ def serialize( class ForObjectExprRule(ExpressionRule): """Rule for object for expressions: {for key, value in items : key => value}""" - _children: Tuple[ + _children_layout: Tuple[ LBRACE, Optional[NewLineOrCommentRule], ForIntroRule, @@ -222,18 +235,26 @@ class ForObjectExprRule(ExpressionRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children) super().__init__(children, meta) - def _insert_optionals(self, children: List, indexes: List[int] = None): + def _insert_optionals( # type: ignore[override] + self, children: List, indexes: Optional[List[int]] = None + ): + """Insert None placeholders, handling optional ellipsis and condition.""" ellipsis_ = None condition = None for child in children: - if ellipsis_ is None and isinstance(child, ELLIPSIS): + if ( + ellipsis_ is None + and isinstance(child, StaticStringToken) + and child.lark_name() == "ELLIPSIS" + ): ellipsis_ = child if condition is None and isinstance(child, ForCondRule): condition = child @@ -252,33 +273,33 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): @property def for_intro(self) -> ForIntroRule: - """Returns the for intro rule""" + """Return the for intro rule.""" return self._children[2] @property def key_expr(self) -> ExpressionRule: - """Returns the key expression""" + """Return the key expression.""" return self._children[4] @property def value_expr(self) -> ExpressionRule: - """Returns the value expression""" + """Return the value expression.""" return self._children[7] @property - def ellipsis(self) -> Optional[ELLIPSIS]: - """Returns the optional ellipsis token""" + def ellipsis(self): + """Return the optional ellipsis token.""" return self._children[9] @property def condition(self) -> Optional[ForCondRule]: - """Returns the optional condition rule""" + """Return the optional condition rule.""" return self._children[11] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - + """Serialize to '{for ... : key => value}' string.""" result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index 49b20f65..e9722965 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -1,4 +1,5 @@ -from functools import lru_cache +"""Rule classes for HCL2 function calls and arguments.""" + from typing import Any, Optional, Tuple, Union, List from hcl2.rules.expressions import ExpressionRule @@ -16,8 +17,9 @@ class ArgumentsRule(InlineCommentMixIn): + """Rule for a comma-separated list of function arguments.""" - _children: Tuple[ + _children_layout: Tuple[ ExpressionRule, Tuple[ Optional[NewLineOrCommentRule], @@ -32,11 +34,12 @@ class ArgumentsRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "arguments" @property - @lru_cache(maxsize=None) def has_ellipsis(self) -> bool: + """Return whether the argument list ends with an ellipsis (...).""" for child in self._children[-2:]: if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": return True @@ -44,13 +47,15 @@ def has_ellipsis(self) -> bool: @property def arguments(self) -> List[ExpressionRule]: + """Return the list of expression arguments.""" return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a comma-separated argument string.""" result = ", ".join( - [str(argument.serialize(options, context)) for argument in self.arguments] + str(argument.serialize(options, context)) for argument in self.arguments ) if self.has_ellipsis: result += " ..." @@ -58,8 +63,9 @@ def serialize( class FunctionCallRule(InlineCommentMixIn): + """Rule for function call expressions (e.g. func(args)).""" - _children: Tuple[ + _children_layout: Tuple[ IdentifierRule, Optional[IdentifierRule], Optional[IdentifierRule], @@ -72,26 +78,34 @@ class FunctionCallRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "function_call" @property - @lru_cache(maxsize=None) def identifiers(self) -> List[IdentifierRule]: + """Return the function name identifier(s).""" return [child for child in self._children if isinstance(child, IdentifierRule)] @property - @lru_cache(maxsize=None) def arguments(self) -> Optional[ArgumentsRule]: + """Return the arguments rule, or None if no arguments.""" for child in self._children: if isinstance(child, ArgumentsRule): return child + return None def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'func(args)' string.""" with context.modify(inside_dollar_string=True): - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" + name = "::".join( + identifier.serialize(options, context) + for identifier in self.identifiers + ) + args = self.arguments + args_str = args.serialize(options, context) if args else "" + result = f"{name}({args_str})" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -100,7 +114,9 @@ def serialize( class ProviderFunctionCallRule(FunctionCallRule): - _children: Tuple[ + """Rule for provider-namespaced function calls.""" + + _children_layout: Tuple[ IdentifierRule, IdentifierRule, IdentifierRule, @@ -113,4 +129,5 @@ class ProviderFunctionCallRule(FunctionCallRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "provider_function_call" diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index fc8cbf90..5cfefb96 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 indexing, attribute access, and splat expressions.""" + from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta @@ -24,28 +26,34 @@ class ShortIndexRule(LarkRule): + """Rule for dot-numeric index access (e.g. .0).""" - _children: Tuple[ + _children_layout: Tuple[ DOT, IntLiteral, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "short_index" @property def index(self): + """Return the index token.""" return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.N' string.""" return f".{self.index.serialize(options)}" class SqbIndexRule(InlineCommentMixIn): - _children: Tuple[ + """Rule for square-bracket index access (e.g. [expr]).""" + + _children_layout: Tuple[ LSQB, Optional[NewLineOrCommentRule], ExprTermRule, @@ -55,15 +63,18 @@ class SqbIndexRule(InlineCommentMixIn): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "braces_index" @property def index_expression(self): + """Return the index expression inside the brackets.""" return self.children[2] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '[expr]' string.""" return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -72,118 +83,146 @@ def __init__(self, children, meta: Optional[Meta] = None): class IndexExprTermRule(ExpressionRule): + """Rule for index access on an expression term.""" - _children: Tuple[ExprTermRule, SqbIndexRule] + _children_layout: Tuple[ExprTermRule, SqbIndexRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "index_expr_term" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr[index]' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + expr = self.children[0].serialize(options) + index = self.children[1].serialize(options) + result = f"{expr}{index}" if not context.inside_dollar_string: result = to_dollar_string(result) return result class GetAttrRule(LarkRule): + """Rule for dot-attribute access (e.g. .name).""" - _children: Tuple[ + _children_layout: Tuple[ DOT, IdentifierRule, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "get_attr" @property def identifier(self) -> IdentifierRule: + """Return the accessed identifier.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.identifier' string.""" return f".{self.identifier.serialize(options, context)}" class GetAttrExprTermRule(ExpressionRule): + """Rule for attribute access on an expression term.""" - _children: Tuple[ + _children_layout: Tuple[ ExprTermRule, GetAttrRule, ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "get_attr_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def get_attr(self) -> GetAttrRule: + """Return the attribute access rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr.attr' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + attr = self.get_attr.serialize(options, context) + result = f"{expr}{attr}" if not context.inside_dollar_string: result = to_dollar_string(result) return result class AttrSplatRule(LarkRule): - _children: Tuple[ + """Rule for attribute splat expressions (e.g. .*.attr).""" + + _children_layout: Tuple[ ATTR_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attr_splat" @property def get_attrs( self, - ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + ) -> List[Union[GetAttrRule, SqbIndexRule, ShortIndexRule]]: + """Return the trailing accessor chain.""" return self._children[1:] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '.*...' string.""" return ".*" + "".join( get_attr.serialize(options, context) for get_attr in self.get_attrs ) class AttrSplatExprTermRule(ExpressionRule): + """Rule for attribute splat on an expression term.""" - _children: Tuple[ExprTermRule, AttrSplatRule] + _children_layout: Tuple[ExprTermRule, AttrSplatRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "attr_splat_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def attr_splat(self) -> AttrSplatRule: + """Return the attribute splat rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr.*...' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + splat = self.attr_splat.serialize(options, context) + result = f"{expr}{splat}" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -191,49 +230,62 @@ def serialize( class FullSplatRule(LarkRule): - _children: Tuple[ + """Rule for full splat expressions (e.g. [*].attr).""" + + _children_layout: Tuple[ ATTR_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "full_splat" @property def get_attrs( self, - ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + ) -> List[Union[GetAttrRule, SqbIndexRule, ShortIndexRule]]: + """Return the trailing accessor chain.""" return self._children[1:] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to '[*]...' string.""" return "[*]" + "".join( get_attr.serialize(options, context) for get_attr in self.get_attrs ) class FullSplatExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, FullSplatRule] + """Rule for full splat on an expression term.""" + + _children_layout: Tuple[ExprTermRule, FullSplatRule] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "full_splat_expr_term" @property def expr_term(self) -> ExprTermRule: + """Return the base expression term.""" return self._children[0] @property def attr_splat(self) -> FullSplatRule: + """Return the full splat rule.""" return self._children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to 'expr[*]...' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + expr = self.expr_term.serialize(options, context) + splat = self.attr_splat.serialize(options, context) + result = f"{expr}{splat}" if not context.inside_dollar_string: result = to_dollar_string(result) diff --git a/hcl2/rules/literal_rules.py b/hcl2/rules/literal_rules.py index cb8396b4..1db333f5 100644 --- a/hcl2/rules/literal_rules.py +++ b/hcl2/rules/literal_rules.py @@ -1,3 +1,5 @@ +"""Rule classes for literal values (keywords, identifiers, numbers, operators).""" + from abc import ABC from typing import Any, Tuple @@ -6,49 +8,69 @@ class TokenRule(LarkRule, ABC): + """Base rule wrapping a single token child.""" - _children: Tuple[LarkToken] + _children_layout: Tuple[LarkToken] @property def token(self) -> LarkToken: + """Return the single token child.""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize by delegating to the token's own serialization.""" return self.token.serialize() class KeywordRule(TokenRule): + """Rule for HCL2 keyword literals (true, false, null).""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "keyword" class IdentifierRule(TokenRule): + """Rule for HCL2 identifiers.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "identifier" class IntLitRule(TokenRule): + """Rule for integer literal expressions.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "int_lit" class FloatLitRule(TokenRule): + """Rule for floating-point literal expressions.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "float_lit" def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize, preserving scientific notation when configured.""" value = self.token.value # Scientific notation (e.g. 1.23e5) cannot survive a Python float() # round-trip, so preserve it as a ${...} expression string. - if options.preserve_scientific_notation and isinstance(value, str) and "e" in value.lower(): + if ( + options.preserve_scientific_notation + and isinstance(value, str) + and "e" in value.lower() + ): if context.inside_dollar_string: return value return to_dollar_string(value) @@ -56,6 +78,9 @@ def serialize( class BinaryOperatorRule(TokenRule): + """Rule for binary operator tokens.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "binary_operator" diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index b757f317..c56e6e79 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -1,3 +1,5 @@ +"""Rule classes for HCL2 string literals, interpolation, and heredoc templates.""" + import sys from typing import Tuple, List, Any, Union @@ -9,21 +11,22 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, - HEREDOC_TEMPLATE, + HEREDOC_TEMPLATE, HEREDOC_TRIM_TEMPLATE, ) from hcl2.utils import ( SerializationOptions, SerializationContext, to_dollar_string, - HEREDOC_TRIM_PATTERN, + HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN, ) class InterpolationRule(LarkRule): + """Rule for ${expression} interpolation within strings.""" - _children: Tuple[ + _children_layout: Tuple[ INTERP_START, ExpressionRule, RBRACE, @@ -31,78 +34,97 @@ class InterpolationRule(LarkRule): @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "interpolation" @property def expression(self): + """Return the interpolated expression.""" return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to ${expression} string.""" return to_dollar_string(self.expression.serialize(options)) class StringPartRule(LarkRule): - _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] + """Rule for a single part of a string (literal text, escape, or interpolation).""" + + _children_layout: Tuple[ + Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule] + ] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "string_part" @property def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + """Return the content element (string chars, escape, or interpolation).""" return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize this string part.""" return self.content.serialize(options, context) class StringRule(LarkRule): + """Rule for quoted string literals.""" - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children_layout: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "string" @property def string_parts(self): + """Return the list of string parts between quotes.""" return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to a quoted string.""" return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class HeredocTemplateRule(LarkRule): - - _children: Tuple[HEREDOC_TEMPLATE] + """Rule for heredoc template strings (< str: + """Return the grammar rule name.""" return "heredoc_template" - + @property def heredoc(self): + """Return the raw heredoc token.""" return self.children[0] - + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the heredoc, optionally stripping to a plain string.""" heredoc = self.heredoc.serialize(options, context) - + if not options.preserve_heredocs: match = HEREDOC_PATTERN.match(heredoc) if not match: raise RuntimeError(f"Invalid Heredoc token: {heredoc}") heredoc = match.group(2).rstrip(self._trim_chars) - heredoc = heredoc.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') + heredoc = ( + heredoc.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + ) return f'"{heredoc}"' result = heredoc.rstrip(self._trim_chars) @@ -110,16 +132,19 @@ def serialize( class HeredocTrimTemplateRule(HeredocTemplateRule): + """Rule for indented heredoc template strings (<<-MARKER).""" + + _children_layout: Tuple[HEREDOC_TRIM_TEMPLATE] - _children: Tuple[HEREDOC_TRIM_TEMPLATE] - @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "heredoc_trim_template" - + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize the trim heredoc, stripping common leading whitespace.""" # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions # This is a special version of heredocs that are declared with "<<-" # This will calculate the minimum number of leading spaces in each line of a heredoc @@ -146,8 +171,7 @@ def serialize( lines = [line[min_spaces:] for line in lines] if not options.preserve_heredocs: - lines = [line.replace('\\', '\\\\').replace('"', '\\"') for line in lines] + lines = [line.replace("\\", "\\\\").replace('"', '\\"') for line in lines] sep = "\\n" if not options.preserve_heredocs else "\n" return '"' + sep.join(lines) + '"' - \ No newline at end of file diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index 06d1611f..e648bc14 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,5 +1,7 @@ +"""Token classes for terminal elements in the LarkElement tree.""" + from functools import lru_cache -from typing import Callable, Any, Type, Optional, Tuple +from typing import Callable, Any, Dict, Type, Optional, Tuple, Union from hcl2.rules.abstract import LarkToken @@ -24,28 +26,31 @@ def __build_subclass(cls, name: str) -> Type["StringToken"]: ) def __class_getitem__(cls, name: str) -> Type["StringToken"]: + """Return a cached subclass keyed by the given grammar token name.""" if not isinstance(name, str): raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Optional[Any] = None): - super().__init__(value) + def __init__(self, value: Optional[Union[str, int, float]] = None): + super().__init__(value) # type: ignore[arg-type] @property def serialize_conversion(self) -> Callable[[Any], str]: + """Return str as the conversion callable.""" return str class StaticStringToken(StringToken): + """A StringToken subclass with a fixed default value set at class-creation time.""" - classes_by_value = {} + classes_by_value: Dict[Optional[str], Type["StringToken"]] = {} @classmethod @lru_cache(maxsize=None) def __build_subclass( - cls, name: str, default_value: str = None + cls, name: str, default_value: Optional[str] = None ) -> Type["StringToken"]: - """Create a subclass with a constant `lark_name`.""" + """Create a subclass with a constant `lark_name` and default value.""" result = type( # type: ignore f"{name}_TOKEN", @@ -59,65 +64,83 @@ def __build_subclass( cls.classes_by_value[default_value] = result return result - def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: - name, default_value = value - return cls.__build_subclass(name, default_value) + def __class_getitem__( # type: ignore[override] + cls, name: Tuple[str, str] + ) -> Type["StringToken"]: + """Return a cached subclass keyed by a (token_name, default_value) tuple.""" + token_name, default_value = name + return cls.__build_subclass(token_name, default_value) def __init__(self): super().__init__(getattr(self, "_default_value")) @property def serialize_conversion(self) -> Callable[[Any], str]: + """Return str as the conversion callable.""" return str -# explicitly define various kinds of string-based tokens for type hinting +# Explicitly define various kinds of string-based tokens for type hinting. +# mypy cannot follow the dynamic __class_getitem__ pattern, so every alias +# in this block carries a blanket ``type: ignore``. +# pylint: disable=invalid-name + # variable values -NAME = StringToken["NAME"] -STRING_CHARS = StringToken["STRING_CHARS"] -ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] -BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] -NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] +NAME = StringToken["NAME"] # type: ignore +STRING_CHARS = StringToken["STRING_CHARS"] # type: ignore +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] # type: ignore +BINARY_OP = StringToken["BINARY_OP"] # type: ignore +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] # type: ignore +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] # type: ignore +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # type: ignore # static values -EQ = StaticStringToken[("EQ", "=")] -COLON = StaticStringToken[("COLON", ":")] -LPAR = StaticStringToken[("LPAR", "(")] -RPAR = StaticStringToken[("RPAR", ")")] -LBRACE = StaticStringToken[("LBRACE", "{")] -RBRACE = StaticStringToken[("RBRACE", "}")] -DOT = StaticStringToken[("DOT", ".")] -COMMA = StaticStringToken[("COMMA", ",")] -ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] -QMARK = StaticStringToken[("QMARK", "?")] -LSQB = StaticStringToken[("LSQB", "[")] -RSQB = StaticStringToken[("RSQB", "]")] -INTERP_START = StaticStringToken[("INTERP_START", "${")] -DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] -ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] -FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] -FOR = StaticStringToken[("FOR", "for")] -IN = StaticStringToken[("IN", "in")] -IF = StaticStringToken[("IF", "if")] -FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] +EQ = StaticStringToken[("EQ", "=")] # type: ignore +COLON = StaticStringToken[("COLON", ":")] # type: ignore +LPAR = StaticStringToken[("LPAR", "(")] # type: ignore +RPAR = StaticStringToken[("RPAR", ")")] # type: ignore +LBRACE = StaticStringToken[("LBRACE", "{")] # type: ignore +RBRACE = StaticStringToken[("RBRACE", "}")] # type: ignore +DOT = StaticStringToken[("DOT", ".")] # type: ignore +COMMA = StaticStringToken[("COMMA", ",")] # type: ignore +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] # type: ignore +QMARK = StaticStringToken[("QMARK", "?")] # type: ignore +LSQB = StaticStringToken[("LSQB", "[")] # type: ignore +RSQB = StaticStringToken[("RSQB", "]")] # type: ignore +INTERP_START = StaticStringToken[("INTERP_START", "${")] # type: ignore +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] # type: ignore +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] # type: ignore +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] # type: ignore +FOR = StaticStringToken[("FOR", "for")] # type: ignore +IN = StaticStringToken[("IN", "in")] # type: ignore +IF = StaticStringToken[("IF", "if")] # type: ignore +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] # type: ignore + +# pylint: enable=invalid-name class IntLiteral(LarkToken): + """Token for integer literal values.""" + @staticmethod def lark_name() -> str: + """Return the grammar token name.""" return "INT_LITERAL" @property def serialize_conversion(self) -> Callable: + """Return int as the conversion callable.""" return int class FloatLiteral(LarkToken): + """Token for floating-point literal values.""" + @staticmethod def lark_name() -> str: + """Return the grammar token name.""" return "FLOAT_LITERAL" @property def serialize_conversion(self) -> Callable: + """Return float as the conversion callable.""" return float diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 5f2fa886..540845d7 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -1,28 +1,37 @@ +"""Rule classes for whitespace, comments, and inline comment handling.""" + from abc import ABC -from typing import Optional, List, Any, Tuple +from typing import Optional, List, Any -from hcl2.rules.abstract import LarkToken, LarkRule +from hcl2.rules.abstract import LarkRule from hcl2.rules.literal_rules import TokenRule +from hcl2.rules.tokens import NL_OR_COMMENT from hcl2.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): + """Rule for newline and comment tokens.""" + @staticmethod def lark_name() -> str: + """Return the grammar rule name.""" return "new_line_or_comment" @classmethod def from_string(cls, string: str) -> "NewLineOrCommentRule": - return cls([LarkToken("NL_OR_COMMENT", string)]) + """Create an instance from a raw comment or newline string.""" + return cls([NL_OR_COMMENT(string)]) # type: ignore[abstract] # pylint: disable=abstract-class-instantiated def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + """Serialize to the raw comment/newline string.""" return self.token.serialize() def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: + """Extract comment text strings, or None if only a newline.""" comment = self.serialize(options) if comment == "\n": return None @@ -48,8 +57,11 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _insert_optionals(self, children: List, indexes: List[int] = None): - for index in indexes: + """Mixin for rules that may contain inline comments among their children.""" + + def _insert_optionals(self, children: List, indexes: Optional[List[int]] = None): + """Insert None placeholders at expected optional-child positions.""" + for index in indexes: # type: ignore[union-attr] try: child = children[index] except IndexError: @@ -59,6 +71,7 @@ def _insert_optionals(self, children: List, indexes: List[int] = None): children.insert(index, None) def inline_comments(self): + """Collect all inline comment strings from this rule's children.""" result = [] for child in self._children: From 72078f07e9cdd61e6cf628d850874926761eb058 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 20:42:05 +0100 Subject: [PATCH 33/42] fix pre-commit errors --- .coveragerc | 2 +- .github/ISSUE_TEMPLATE/hcl2-parsing-error.md | 14 +- .pre-commit-config.yaml | 1 + cli/hcl_to_json.py | 27 +- cli/helpers.py | 6 +- cli/json_to_hcl.py | 26 +- hcl2/api.py | 35 +- hcl2/deserializer.py | 51 +-- hcl2/formatter.py | 46 ++- hcl2/reconstructor.py | 35 +- hcl2/rules/abstract.py | 8 +- hcl2/rules/tokens.py | 12 +- hcl2/rules/whitespace.py | 4 +- hcl2/transformer.py | 12 +- hcl2/utils.py | 11 + pylintrc | 2 +- test/integration/hcl2_original/smoke.tf | 2 +- test/integration/test_round_trip.py | 15 +- test/integration/test_specialized.py | 14 +- test/unit/cli/test_hcl_to_json.py | 16 +- test/unit/cli/test_helpers.py | 47 ++- test/unit/cli/test_json_to_hcl.py | 16 +- test/unit/rules/test_abstract.py | 3 +- test/unit/rules/test_base.py | 11 +- test/unit/rules/test_containers.py | 1 + test/unit/rules/test_expressions.py | 8 +- test/unit/rules/test_for_expressions.py | 337 +++++++++++-------- test/unit/rules/test_functions.py | 1 + test/unit/rules/test_literal_rules.py | 1 + test/unit/rules/test_strings.py | 9 +- test/unit/rules/test_tokens.py | 3 +- test/unit/rules/test_whitespace.py | 9 +- test/unit/test_api.py | 51 ++- test/unit/test_builder.py | 6 +- test/unit/test_deserializer.py | 16 +- test/unit/test_formatter.py | 95 +++--- test/unit/test_utils.py | 1 + 37 files changed, 561 insertions(+), 393 deletions(-) diff --git a/.coveragerc b/.coveragerc index 558bc244..89ef860b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -3,7 +3,7 @@ branch = true omit = hcl2/lark_parser.py hcl2/version.py - hcl2/__main__.py + hcl2/__main__.py hcl2/__init__.py hcl2/rules/__init__.py cli/__init__.py diff --git a/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md b/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md index 4837d3ff..1b526e9a 100644 --- a/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md +++ b/.github/ISSUE_TEMPLATE/hcl2-parsing-error.md @@ -1,27 +1,31 @@ ---- +______________________________________________________________________ + name: HCL2 parsing error about: Template for reporting a bug related to parsing HCL2 code title: '' labels: bug assignees: kkozik-amplify ---- +______________________________________________________________________ **Describe the bug** A clear and concise description of what the bug is. **Software:** - - OS: [macOS / Windows / Linux] - - Python version (e.g. 3.9.21) - - python-hcl2 version (e.g. 7.0.0) + +- OS: \[macOS / Windows / Linux\] +- Python version (e.g. 3.9.21) +- python-hcl2 version (e.g. 7.0.0) **Snippet of HCL2 code causing the unexpected behaviour:** + ```terraform locals { foo = "bar" } ``` + **Expected behavior** A clear and concise description of what you expected to happen, e.g. python dictionary or JSON you expected to receive as a result of parsing. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11b63555..ef43294d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,7 @@ repos: rev: v4.3.0 hooks: - id: trailing-whitespace + exclude: ^test/integration/(hcl2_reconstructed|specialized)/ - id: end-of-file-fixer - id: check-added-large-files - id: no-commit-to-branch # Prevent commits directly to master diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index faa9fb33..d4acf0e5 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -2,16 +2,24 @@ import argparse import json import os -from typing import IO +from typing import IO, Optional, TextIO from hcl2 import load from hcl2.utils import SerializationOptions from hcl2.version import __version__ -from .helpers import HCL_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin +from .helpers import ( + HCL_SKIPPABLE, + _convert_single_file, + _convert_directory, + _convert_stdin, +) def _hcl_to_json( - in_file: IO, out_file: IO, options: SerializationOptions, json_indent: int = None, + in_file: TextIO, + out_file: IO, + options: SerializationOptions, + json_indent: Optional[int] = None, ) -> None: data = load(in_file, serialization_options=options) json.dump(data, out_file, indent=json_indent) @@ -108,11 +116,18 @@ def convert(in_file, out_file): if args.PATH == "-": _convert_stdin(convert) elif os.path.isfile(args.PATH): - _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE) + _convert_single_file( + args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE + ) elif os.path.isdir(args.PATH): _convert_directory( - args.PATH, args.OUT_PATH, convert, args.skip, HCL_SKIPPABLE, - in_extensions={".tf", ".hcl"}, out_extension=".json", + args.PATH, + args.OUT_PATH, + convert, + args.skip, + HCL_SKIPPABLE, + in_extensions={".tf", ".hcl"}, + out_extension=".json", ) else: raise RuntimeError("Invalid Path", args.PATH) diff --git a/cli/helpers.py b/cli/helpers.py index bef6ba6f..6d463d45 100644 --- a/cli/helpers.py +++ b/cli/helpers.py @@ -2,7 +2,7 @@ import json import os import sys -from typing import Callable, IO, Set, Tuple +from typing import Callable, IO, Set, Tuple, Type from lark import UnexpectedCharacters, UnexpectedToken @@ -16,7 +16,7 @@ def _convert_single_file( out_path: str, convert_fn: Callable[[IO, IO], None], skip: bool, - skippable: Tuple[type, ...], + skippable: Tuple[Type[BaseException], ...], ) -> None: with open(in_path, "r", encoding="utf-8") as in_file: print(in_path, file=sys.stderr, flush=True) @@ -43,7 +43,7 @@ def _convert_directory( out_path: str, convert_fn: Callable[[IO, IO], None], skip: bool, - skippable: Tuple[type, ...], + skippable: Tuple[Type[BaseException], ...], in_extensions: Set[str], out_extension: str, ) -> None: diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py index 48ade7c0..65caeb09 100644 --- a/cli/json_to_hcl.py +++ b/cli/json_to_hcl.py @@ -2,18 +2,23 @@ import argparse import json import os -from typing import IO +from typing import TextIO from hcl2 import dump from hcl2.deserializer import DeserializerOptions from hcl2.formatter import FormatterOptions from hcl2.version import __version__ -from .helpers import JSON_SKIPPABLE, _convert_single_file, _convert_directory, _convert_stdin +from .helpers import ( + JSON_SKIPPABLE, + _convert_single_file, + _convert_directory, + _convert_stdin, +) def _json_to_hcl( - in_file: IO, - out_file: IO, + in_file: TextIO, + out_file: TextIO, d_opts: DeserializerOptions, f_opts: FormatterOptions, ) -> None: @@ -114,11 +119,18 @@ def convert(in_file, out_file): if args.PATH == "-": _convert_stdin(convert) elif os.path.isfile(args.PATH): - _convert_single_file(args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE) + _convert_single_file( + args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE + ) elif os.path.isdir(args.PATH): _convert_directory( - args.PATH, args.OUT_PATH, convert, args.skip, JSON_SKIPPABLE, - in_extensions={".json"}, out_extension=".tf", + args.PATH, + args.OUT_PATH, + convert, + args.skip, + JSON_SKIPPABLE, + in_extensions={".json"}, + out_extension=".tf", ) else: raise RuntimeError("Invalid Path", args.PATH) diff --git a/hcl2/api.py b/hcl2/api.py index 0238f418..db4caa72 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -64,7 +64,13 @@ def dump( :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. """ - file.write(dumps(data, deserializer_options=deserializer_options, formatter_options=formatter_options)) + file.write( + dumps( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + ) + ) def dumps( @@ -79,7 +85,11 @@ def dumps( :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. """ - tree = from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options) + tree = from_dict( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + ) return reconstruct(tree) @@ -135,18 +145,18 @@ def from_dict( *, deserializer_options: Optional[DeserializerOptions] = None, formatter_options: Optional[FormatterOptions] = None, - format: bool = True, + apply_format: bool = True, ) -> StartRule: """Convert a Python dict into a LarkElement tree. :param data: Python dict (as produced by :func:`load`). :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. - :param format: If True (default), apply formatting to the tree. + :param apply_format: If True (default), apply formatting to the tree. """ deserializer = BaseDeserializer(deserializer_options) tree = deserializer.load_python(data) - if format: + if apply_format: formatter = BaseFormatter(formatter_options) formatter.format_tree(tree) return tree @@ -157,17 +167,22 @@ def from_json( *, deserializer_options: Optional[DeserializerOptions] = None, formatter_options: Optional[FormatterOptions] = None, - format: bool = True, + apply_format: bool = True, ) -> StartRule: """Convert a JSON string into a LarkElement tree. :param text: JSON string. :param deserializer_options: Options controlling deserialization behavior. :param formatter_options: Options controlling formatting behavior. - :param format: If True (default), apply formatting to the tree. + :param apply_format: If True (default), apply formatting to the tree. """ data = _json.loads(text) - return from_dict(data, deserializer_options=deserializer_options, formatter_options=formatter_options, format=format) + return from_dict( + data, + deserializer_options=deserializer_options, + formatter_options=formatter_options, + apply_format=apply_format, + ) def reconstruct(tree) -> str: @@ -187,7 +202,9 @@ def transform(lark_tree: Tree, *, discard_comments: bool = False) -> StartRule: :param lark_tree: Raw Lark tree from :func:`parse_to_tree` or :func:`parse_string_to_tree`. :param discard_comments: If True, discard comments during transformation. """ - return RuleTransformer(discard_new_line_or_comments=discard_comments).transform(lark_tree) + return RuleTransformer(discard_new_line_or_comments=discard_comments).transform( + lark_tree + ) def serialize( diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 328427db..03dac5d8 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -1,9 +1,10 @@ +"""Deserialize Python dicts (or JSON) into LarkElement trees.""" import json import re from abc import ABC, abstractmethod from dataclasses import dataclass from functools import cached_property -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Optional, Union from regex import regex @@ -62,34 +63,40 @@ @dataclass class DeserializerOptions: + """Options controlling how Python dicts are deserialized into LarkElement trees.""" + heredocs_to_strings: bool = False strings_to_heredocs: bool = False object_elements_colon: bool = False object_elements_trailing_comma: bool = True - # with_comments: bool = False # TODO + # with_comments: bool = False # TODO class LarkElementTreeDeserializer(ABC): - def __init__(self, options: DeserializerOptions = None): + """Abstract base for deserializers that produce LarkElement trees.""" + + def __init__(self, options: Optional[DeserializerOptions] = None): self.options = options or DeserializerOptions() @abstractmethod def loads(self, value: str) -> LarkElement: + """Deserialize a JSON string into a LarkElement tree.""" raise NotImplementedError() def load(self, file: TextIO) -> LarkElement: + """Deserialize a JSON file into a LarkElement tree.""" return self.loads(file.read()) class BaseDeserializer(LarkElementTreeDeserializer): - def __init__(self, options=None): - super().__init__(options) + """Default deserializer: Python dict/JSON → LarkElement tree.""" @cached_property def _transformer(self) -> RuleTransformer: return RuleTransformer() - def load_python(self, value: Any) -> LarkElement: + def load_python(self, value: Any) -> StartRule: + """Deserialize a Python object into a StartRule tree.""" if isinstance(value, dict): # Top-level dict is always a body (attributes + blocks), not an object children = self._deserialize_block_elements(value) @@ -99,13 +106,14 @@ def load_python(self, value: Any) -> LarkElement: return result def loads(self, value: str) -> LarkElement: + """Deserialize a JSON string into a LarkElement tree.""" return self.load_python(json.loads(value)) def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - children = [] + children: List[Any] = [] block_elements = self._deserialize_block_elements(value) for element in block_elements: @@ -120,8 +128,8 @@ def _deserialize(self, value: Any) -> LarkElement: return self._deserialize_text(value) - def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: - children = [] + def _deserialize_block_elements(self, value: dict) -> List[LarkElement]: + children: List[LarkElement] = [] for key, val in value.items(): if self._is_block(val): # this value is a list of blocks, iterate over each block and deserialize them @@ -135,6 +143,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children + # pylint: disable=R0911 def _deserialize_text(self, value: Any) -> LarkRule: # bool must be checked before int since bool is a subclass of int if isinstance(value, bool): @@ -160,7 +169,7 @@ def _deserialize_text(self, value: Any) -> LarkRule: if self.options.strings_to_heredocs: inner = value[1:-1] - if '\\n' in inner: + if "\\n" in inner: return self._deserialize_string_as_heredoc(inner) return self._deserialize_string(value) @@ -192,8 +201,8 @@ def _deserialize_string(self, value: str) -> StringRule: if part.endswith('"'): part = part[:-1] - e = self._deserialize_string_part(part) - result.append(e) + string_part = self._deserialize_string_part(part) + result.append(string_part) return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) @@ -224,7 +233,7 @@ def _deserialize_string_as_heredoc(self, inner: str) -> HeredocTemplateRule: # Single-pass unescape: \\n → \n, \\" → ", \\\\ → \ content = re.sub( r'\\(n|"|\\)', - lambda m: '\n' if m.group(1) == 'n' else m.group(1), + lambda m: "\n" if m.group(1) == "n" else m.group(1), inner, ) heredoc = f"< AttributeRule: return AttributeRule(children) def _deserialize_list(self, value: List) -> TupleRule: - children = [] + children: List[Any] = [] for element in value: deserialized = self._deserialize(element) if not isinstance(deserialized, ExprTermRule): @@ -300,7 +309,7 @@ def _deserialize_list(self, value: List) -> TupleRule: return TupleRule([LSQB(), *children, RSQB()]) def _deserialize_object(self, value: dict) -> ObjectRule: - children = [] + children: List[Any] = [] for key, val in value.items(): children.append(self._deserialize_object_elem(key, val)) @@ -320,11 +329,11 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: ) elif isinstance(key, str) and "." in key: parts = key.split(".") - children = [] + dot_children: List[Any] = [] for part in parts: - children.append(self._deserialize_identifier(part)) - children.append(DOT()) - key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + dot_children.append(self._deserialize_identifier(part)) + dot_children.append(DOT()) + key = ObjectElemKeyDotAccessor(dot_children[:-1]) # without the last dot else: key = self._deserialize_text(key) @@ -364,6 +373,8 @@ def _contains_block_marker(self, obj: dict) -> bool: return True if isinstance(value, list): for element in value: - if isinstance(element, dict) and self._contains_block_marker(element): + if isinstance(element, dict) and self._contains_block_marker( + element + ): return True return False diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 23302187..8b691c44 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -1,6 +1,7 @@ +"""Format LarkElement trees with indentation, alignment, and spacing.""" from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import List +from typing import List, Optional from hcl2.rules.abstract import LarkElement from hcl2.rules.base import ( @@ -10,7 +11,7 @@ BodyRule, ) from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule -from hcl2.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rules.expressions import ExprTermRule from hcl2.rules.for_expressions import ( ForTupleExprRule, ForObjectExprRule, @@ -23,6 +24,8 @@ @dataclass class FormatterOptions: + """Options controlling whitespace formatting of LarkElement trees.""" + indent_length: int = 2 open_empty_blocks: bool = True open_empty_objects: bool = True @@ -33,27 +36,35 @@ class FormatterOptions: class LarkElementTreeFormatter(ABC): - def __init__(self, options: FormatterOptions = None): + """Abstract base for formatters that operate on LarkElement trees.""" + + def __init__(self, options: Optional[FormatterOptions] = None): self.options = options or FormatterOptions() @abstractmethod def format_tree(self, tree: LarkElement): + """Apply formatting to the given LarkElement tree in place.""" raise NotImplementedError() class BaseFormatter(LarkElementTreeFormatter): - def __init__(self, options: FormatterOptions = None): + """Default formatter: adds indentation, newlines, and vertical alignment.""" + + def __init__(self, options: Optional[FormatterOptions] = None): super().__init__(options) - self._last_new_line: NewLineOrCommentRule = None + self._last_new_line: Optional[NewLineOrCommentRule] = None def format_tree(self, tree: LarkElement): + """Apply formatting to the given LarkElement tree in place.""" if isinstance(tree, StartRule): self.format_start_rule(tree) def format_start_rule(self, rule: StartRule): + """Format the top-level start rule.""" self.format_body_rule(rule.body, 0) def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + """Format a block rule with its body and closing brace.""" if self.options.vertically_align_attributes: self._vertically_align_attributes_in_body(rule.body) @@ -64,7 +75,7 @@ def format_block_rule(self, rule: BlockRule, indent_level: int = 0): rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) def format_body_rule(self, rule: BodyRule, indent_level: int = 0): - + """Format a body rule, adding newlines between attributes and blocks.""" in_start = isinstance(rule.parent, StartRule) new_children = [] @@ -90,9 +101,11 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): rule._children = new_children def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + """Format an attribute rule by formatting its value expression.""" self.format_expression(rule.expression, indent_level + 1) def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + """Format a tuple rule with one element per line.""" if len(rule.elements) == 0: if self.options.open_empty_tuples: rule.children.insert(1, self._build_newline(indent_level - 1, 2)) @@ -104,31 +117,31 @@ def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) - if isinstance(child, (COMMA, LSQB)): + if isinstance(child, (COMMA, LSQB)): # type: ignore[misc] new_children.append(self._build_newline(indent_level)) self._deindent_last_line() rule._children = new_children def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + """Format an object rule with one element per line and optional alignment.""" if len(rule.elements) == 0: if self.options.open_empty_objects: rule.children.insert(1, self._build_newline(indent_level - 1, 2)) return new_children = [] - for i in range(len(rule.children)): - child = rule.children[i] + for i, child in enumerate(rule.children): next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None new_children.append(child) - if isinstance(child, LBRACE): + if isinstance(child, LBRACE): # type: ignore[misc] new_children.append(self._build_newline(indent_level)) if ( next_child and isinstance(next_child, ObjectElemRule) - and isinstance(child, (ObjectElemRule, COMMA)) + and isinstance(child, (ObjectElemRule, COMMA)) # type: ignore[misc] ): new_children.append(self._build_newline(indent_level)) @@ -144,6 +157,7 @@ def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): self._vertically_align_object_elems(rule) def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + """Dispatch formatting for the inner expression of an ExprTermRule.""" if isinstance(rule.expression, ObjectRule): self.format_object_rule(rule.expression, indent_level) @@ -160,6 +174,7 @@ def format_expression(self, rule: ExprTermRule, indent_level: int = 0): self.format_expression(rule.expression, indent_level) def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + """Format a for-tuple expression with newlines around clauses.""" for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) @@ -182,6 +197,7 @@ def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = def format_forobjectexpr( self, expression: ForObjectExprRule, indent_level: int = 0 ): + """Format a for-object expression with newlines around clauses.""" for child in expression.children: if isinstance(child, ExprTermRule): self.format_expression(child, indent_level + 1) @@ -220,8 +236,7 @@ def _vertically_align_attributes_in_body(self, body: BodyRule): def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): max_length = max( - len(attribute.identifier.token.value) - for attribute in attributes_sequence + len(attribute.identifier.token.value) for attribute in attributes_sequence ) for attribute in attributes_sequence: name_length = len(attribute.identifier.token.value) @@ -238,7 +253,7 @@ def _vertically_align_object_elems(self, rule: ObjectRule): spaces_to_add = max_length - key_length separator = elem.children[1] - if isinstance(separator, COLON): + if isinstance(separator, COLON): # type: ignore[misc] spaces_to_add += 1 elem.children[1].set_value(" " * spaces_to_add + separator.value) @@ -257,7 +272,8 @@ def _build_newline( return result def _deindent_last_line(self, times: int = 1): + assert self._last_new_line is not None token = self._last_new_line.token - for i in range(times): + for _ in range(times): if token.value.endswith(" " * self.options.indent_length): token.set_value(token.value[: -self.options.indent_length]) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index abfc21f6..b9f3b3ce 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -1,4 +1,5 @@ -from typing import List, Union +"""Reconstruct HCL2 text from a Lark Tree AST.""" +from typing import List, Optional, Union from lark import Tree, Token from hcl2.rules import tokens @@ -33,10 +34,16 @@ class HCLReconstructor: } def __init__(self): - self._reset_state() + self._last_was_space = True + self._current_indent = 0 + self._last_token_name: Optional[str] = None + self._last_rule_name: Optional[str] = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False def _reset_state(self): - """State tracking for formatting decisions""" + """Reset state tracking for formatting decisions.""" self._last_was_space = True self._current_indent = 0 self._last_token_name = None @@ -45,8 +52,9 @@ def _reset_state(self): self._in_object = False self._in_tuple = False + # pylint:disable=R0911,R0912 def _should_add_space_before( - self, current_node: Union[Tree, Token], parent_rule_name: str = None + self, current_node: Union[Tree, Token], parent_rule_name: Optional[str] = None ) -> bool: """Determine if we should add a space before the current token/rule.""" @@ -151,7 +159,9 @@ def _should_add_space_before( return False - def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + def _reconstruct_tree( + self, tree: Tree, parent_rule_name: Optional[str] = None + ) -> List[str]: """Recursively reconstruct a Tree node into HCL text fragments.""" result = [] rule_name = tree.data @@ -197,7 +207,9 @@ def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[st return result - def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + def _reconstruct_token( + self, token: Token, parent_rule_name: Optional[str] = None + ) -> str: """Reconstruct a Token node into HCL text fragments.""" result = str(token.value) if self._should_add_space_before(token, parent_rule_name): @@ -210,18 +222,17 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: return result def _reconstruct_node( - self, node: Union[Tree, Token], parent_rule_name: str = None + self, node: Union[Tree, Token], parent_rule_name: Optional[str] = None ) -> List[str]: """Reconstruct any node (Tree or Token) into HCL text fragments.""" if isinstance(node, Tree): return self._reconstruct_tree(node, parent_rule_name) - elif isinstance(node, Token): + if isinstance(node, Token): return [self._reconstruct_token(node, parent_rule_name)] - else: - # Fallback: convert to string - return [str(node)] + # Fallback: convert to string + return [str(node)] - def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + def reconstruct(self, tree: Tree, postproc=None) -> str: """Convert a Lark.Tree AST back into a string representation of HCL.""" # Reset state self._reset_state() diff --git a/hcl2/rules/abstract.py b/hcl2/rules/abstract.py index 26cda89c..554bc44d 100644 --- a/hcl2/rules/abstract.py +++ b/hcl2/rules/abstract.py @@ -46,7 +46,7 @@ def serialize( class LarkToken(LarkElement, ABC): """Base class for terminal token elements (leaves of the tree).""" - def __init__(self, value: Union[str, int, float]): + def __init__(self, value: Optional[Union[str, int, float]] = None): self._value = value super().__init__() @@ -100,7 +100,7 @@ def serialize( raise NotImplementedError() @property - def children(self) -> List[LarkElement]: + def children(self) -> List[Any]: """Return the list of child elements.""" return self._children @@ -125,9 +125,9 @@ def to_lark(self) -> Tree: return Tree(self.lark_name(), result_children, meta=self._meta) - def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + def __init__(self, children: List[Any], meta: Optional[Meta] = None): super().__init__() - self._children = children + self._children: List[Any] = children self._meta = meta or Meta() for index, child in enumerate(children): diff --git a/hcl2/rules/tokens.py b/hcl2/rules/tokens.py index e648bc14..dab5ac4f 100644 --- a/hcl2/rules/tokens.py +++ b/hcl2/rules/tokens.py @@ -1,7 +1,7 @@ """Token classes for terminal elements in the LarkElement tree.""" from functools import lru_cache -from typing import Callable, Any, Dict, Type, Optional, Tuple, Union +from typing import Callable, Any, Dict, Type, Optional, Tuple from hcl2.rules.abstract import LarkToken @@ -12,6 +12,13 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ + @staticmethod + def lark_name() -> str: + """Overridden by dynamic subclasses created via ``__class_getitem__``.""" + raise NotImplementedError( + "Use StringToken['NAME'] to create a concrete subclass" + ) + @classmethod @lru_cache(maxsize=None) def __build_subclass(cls, name: str) -> Type["StringToken"]: @@ -31,9 +38,6 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Optional[Union[str, int, float]] = None): - super().__init__(value) # type: ignore[arg-type] - @property def serialize_conversion(self) -> Callable[[Any], str]: """Return str as the conversion callable.""" diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 540845d7..8591fd20 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -61,7 +61,9 @@ class InlineCommentMixIn(LarkRule, ABC): def _insert_optionals(self, children: List, indexes: Optional[List[int]] = None): """Insert None placeholders at expected optional-child positions.""" - for index in indexes: # type: ignore[union-attr] + if indexes is None: + return + for index in indexes: try: child = children[index] except IndexError: diff --git a/hcl2/transformer.py b/hcl2/transformer.py index 7de4f7e1..aebf9d1f 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -1,3 +1,4 @@ +"""Transform Lark parse trees into typed LarkElement rule trees.""" # pylint: disable=missing-function-docstring,unused-argument from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -81,16 +82,19 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable - if token.value in StaticStringToken.classes_by_value.keys(): + if token.value in StaticStringToken.classes_by_value: return StaticStringToken.classes_by_value[token.value]() - return StringToken[token.type](token.value) + return StringToken[token.type](token.value) # type: ignore[misc] + # pylint: disable=C0103 def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: return FloatLiteral(token.value) + # pylint: disable=C0103 def NAME(self, token: Token) -> NAME: return NAME(token.value) + # pylint: disable=C0103 def INT_LITERAL(self, token: Token) -> IntLiteral: return IntLiteral(token.value) @@ -114,7 +118,9 @@ def attribute(self, meta: Meta, args) -> AttributeRule: return AttributeRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + def new_line_or_comment( + self, meta: Meta, args + ): # -> NewLineOrCommentRule | Discard if self.discard_new_line_or_comments: return Discard return NewLineOrCommentRule(args, meta) diff --git a/hcl2/utils.py b/hcl2/utils.py index b15dda8a..4eb31470 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -1,3 +1,4 @@ +"""Serialization options, context tracking, and string utility helpers.""" import re from contextlib import contextmanager from dataclasses import dataclass, replace @@ -9,6 +10,8 @@ @dataclass class SerializationOptions: + """Options controlling how LarkElement trees are serialized to Python dicts.""" + with_comments: bool = True with_meta: bool = False wrap_objects: bool = False @@ -21,10 +24,13 @@ class SerializationOptions: @dataclass class SerializationContext: + """Mutable state tracked during serialization traversal.""" + inside_dollar_string: bool = False inside_parentheses: bool = False def replace(self, **kwargs) -> "SerializationContext": + """Return a new context with the given fields overridden.""" return replace(self, **kwargs) @contextmanager @@ -35,6 +41,7 @@ def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: @contextmanager def modify(self, **kwargs): + """Context manager that temporarily mutates fields, restoring on exit.""" original_values = {key: getattr(self, key) for key in kwargs} for key, value in kwargs.items(): @@ -49,24 +56,28 @@ def modify(self, **kwargs): def is_dollar_string(value: str) -> bool: + """Return True if value is a ${...} interpolation wrapper.""" if not isinstance(value, str): return False return value.startswith("${") and value.endswith("}") def to_dollar_string(value: str) -> str: + """Wrap value in ${...} if not already wrapped.""" if not is_dollar_string(value): return f"${{{value}}}" return value def unwrap_dollar_string(value: str) -> str: + """Strip the ${...} wrapper from value if present.""" if is_dollar_string(value): return value[2:-1] return value def wrap_into_parentheses(value: str) -> str: + """Wrap value in parentheses, preserving ${...} wrappers.""" if is_dollar_string(value): value = unwrap_dollar_string(value) return to_dollar_string(f"({value})") diff --git a/pylintrc b/pylintrc index edd28005..34599008 100644 --- a/pylintrc +++ b/pylintrc @@ -9,7 +9,7 @@ # Add to the black list. It should be a base name, not a # path. You may set this option multiple times. -ignore=CVS +ignore=CVS,version.py # Pickle collected data for later comparisons. persistent=yes diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index e2a0843b..99537532 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -37,7 +37,7 @@ block label1 label2 { k = a.b.5 l = a.*.b m = a[*][c].a.*.1 - + block b1 { a = 1 } diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py index a963c4f8..67217f07 100644 --- a/test/integration/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -9,6 +9,7 @@ 3. JSON → HCL reconstruction (serialize + deserialize + format + reconstruct) 4. Full round-trip (HCL → JSON → HCL → JSON produces identical JSON) """ +# pylint: disable=C0103,C0114,C0115,C0116 import json from enum import Enum @@ -120,7 +121,10 @@ def test_hcl_to_json(self): class TestRoundTripReserialization(TestCase): - """Test JSON → JSON reserialization: parse HCL, serialize, deserialize, reserialize, compare with expected.""" + """Test JSON → JSON reserialization. + + Parse HCL, serialize, deserialize, reserialize, compare with expected. + """ maxDiff = None @@ -128,7 +132,9 @@ def test_json_reserialization(self): for suite in _get_suites(): with self.subTest(suite=suite): hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) - json_reserialized_path = _get_suite_file(suite, SuiteStep.JSON_RESERIALIZED) + json_reserialized_path = _get_suite_file( + suite, SuiteStep.JSON_RESERIALIZED + ) serialized = _parse_and_serialize(hcl_path.read_text()) actual = _deserialize_and_reserialize(serialized) @@ -142,7 +148,10 @@ def test_json_reserialization(self): class TestRoundTripReconstruction(TestCase): - """Test JSON → HCL reconstruction: parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL.""" + """Test JSON → HCL reconstruction. + + Parse HCL, serialize, deserialize, format, reconstruct, compare with expected HCL. + """ maxDiff = None diff --git a/test/integration/test_specialized.py b/test/integration/test_specialized.py index 6fc175ef..1415f307 100644 --- a/test/integration/test_specialized.py +++ b/test/integration/test_specialized.py @@ -4,22 +4,24 @@ (operator precedence, Builder round-trip) with dedicated golden files in test/integration/special/. """ +# pylint: disable=C0103,C0114,C0115,C0116 import json from pathlib import Path +from typing import Optional from unittest import TestCase -from hcl2.deserializer import BaseDeserializer, DeserializerOptions -from hcl2.formatter import BaseFormatter -from hcl2.reconstructor import HCLReconstructor -from hcl2.utils import SerializationOptions - from test.integration.test_round_trip import ( _parse_and_serialize, _deserialize_and_reserialize, _deserialize_and_reconstruct, ) +from hcl2.deserializer import BaseDeserializer, DeserializerOptions +from hcl2.formatter import BaseFormatter +from hcl2.reconstructor import HCLReconstructor +from hcl2.utils import SerializationOptions + SPECIAL_DIR = Path(__file__).absolute().parent / "specialized" @@ -82,7 +84,7 @@ def test_builder_reserialization(self): def _deserialize_and_reconstruct_with_options( serialized: dict, - deserializer_options: DeserializerOptions = None, + deserializer_options: Optional[DeserializerOptions] = None, ) -> str: """Deserialize a Python dict and reconstruct HCL text with custom options.""" deserializer = BaseDeserializer(deserializer_options) diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 67c8c48f..0b40c896 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import json import os import tempfile @@ -8,7 +9,7 @@ from cli.hcl_to_json import main -SIMPLE_HCL = 'x = 1\n' +SIMPLE_HCL = "x = 1\n" SIMPLE_JSON_DICT = {"x": 1} @@ -23,7 +24,6 @@ def _read_file(path): class TestHclToJson(TestCase): - def test_single_file_to_stdout(self): with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") @@ -94,7 +94,7 @@ def test_with_meta_flag(self): self.assertIn("resource", result) def test_no_comments_flag(self): - hcl_with_comment = '# a comment\nx = 1\n' + hcl_with_comment = "# a comment\nx = 1\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_with_comment) @@ -108,7 +108,7 @@ def test_no_comments_flag(self): self.assertIn("comment", output) def test_wrap_objects_flag(self): - hcl_input = 'x = {\n a = 1\n}\n' + hcl_input = "x = {\n a = 1\n}\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_input) @@ -127,7 +127,7 @@ def test_wrap_objects_flag(self): self.assertNotEqual(default["x"], wrapped["x"]) def test_wrap_tuples_flag(self): - hcl_input = 'x = [1, 2]\n' + hcl_input = "x = [1, 2]\n" with tempfile.TemporaryDirectory() as tmpdir: hcl_path = os.path.join(tmpdir, "test.tf") _write_file(hcl_path, hcl_input) @@ -176,7 +176,6 @@ def test_invalid_path_raises_error(self): class TestSingleFileErrorHandling(TestCase): - def test_skip_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: in_path = os.path.join(tmpdir, "test.tf") @@ -224,7 +223,6 @@ def test_raise_error_to_stdout(self): class TestDirectoryEdgeCases(TestCase): - def test_subdirectory_creation(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") @@ -237,9 +235,7 @@ def test_subdirectory_creation(self): with patch("sys.argv", ["hcl2tojson", in_dir, out_dir]): main() - self.assertTrue( - os.path.exists(os.path.join(out_dir, "sub", "nested.json")) - ) + self.assertTrue(os.path.exists(os.path.join(out_dir, "sub", "nested.json"))) def test_directory_raise_error_without_skip(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/test/unit/cli/test_helpers.py b/test/unit/cli/test_helpers.py index 6859d0ab..ee07ac96 100644 --- a/test/unit/cli/test_helpers.py +++ b/test/unit/cli/test_helpers.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import os import tempfile from io import StringIO @@ -13,7 +14,6 @@ def _write_file(path, content): class TestConvertSingleFile(TestCase): - def test_does_not_close_stdout(self): """Regression test: stdout must not be closed after writing.""" with tempfile.TemporaryDirectory() as tmpdir: @@ -83,7 +83,6 @@ def convert(in_f, out_f): class TestConvertDirectory(TestCase): - def test_filters_by_extension(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") @@ -100,8 +99,13 @@ def convert(in_f, out_f): converted_files.append(True) _convert_directory( - in_dir, out_dir, convert, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) self.assertEqual(len(converted_files), 1) @@ -112,8 +116,13 @@ def test_requires_out_path(self): with tempfile.TemporaryDirectory() as tmpdir: with self.assertRaises(RuntimeError): _convert_directory( - tmpdir, None, lambda i, o: None, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + tmpdir, + None, + lambda i, o: None, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) def test_subdirectory_creation(self): @@ -129,13 +138,16 @@ def convert(in_f, out_f): out_f.write(in_f.read()) _convert_directory( - in_dir, out_dir, convert, False, (Exception,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (Exception,), + in_extensions={".tf"}, + out_extension=".json", ) - self.assertTrue( - os.path.exists(os.path.join(out_dir, "sub", "nested.json")) - ) + self.assertTrue(os.path.exists(os.path.join(out_dir, "sub", "nested.json"))) def test_raise_error_without_skip(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -150,13 +162,17 @@ def convert(in_f, out_f): with self.assertRaises(ValueError): _convert_directory( - in_dir, out_dir, convert, False, (ValueError,), - in_extensions={".tf"}, out_extension=".json", + in_dir, + out_dir, + convert, + False, + (ValueError,), + in_extensions={".tf"}, + out_extension=".json", ) class TestConvertStdin(TestCase): - def test_stdin_forward(self): stdout = StringIO() captured = [] @@ -166,8 +182,7 @@ def convert(in_f, out_f): captured.append(data) out_f.write("output") - with patch("sys.stdin", StringIO("input")), \ - patch("sys.stdout", stdout): + with patch("sys.stdin", StringIO("input")), patch("sys.stdout", stdout): _convert_stdin(convert) self.assertEqual(captured[0], "input") diff --git a/test/unit/cli/test_json_to_hcl.py b/test/unit/cli/test_json_to_hcl.py index 469d9188..dc9a1454 100644 --- a/test/unit/cli/test_json_to_hcl.py +++ b/test/unit/cli/test_json_to_hcl.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 import json import os import tempfile @@ -11,19 +12,7 @@ SIMPLE_JSON_DICT = {"x": 1} SIMPLE_JSON = json.dumps(SIMPLE_JSON_DICT) -BLOCK_JSON_DICT = { - "resource": [ - { - "aws_instance": [ - { - "example": [ - {"ami": "abc-123"} - ] - } - ] - } - ] -} +BLOCK_JSON_DICT = {"resource": [{"aws_instance": [{"example": [{"ami": "abc-123"}]}]}]} BLOCK_JSON = json.dumps(BLOCK_JSON_DICT) @@ -38,7 +27,6 @@ def _read_file(path): class TestJsonToHcl(TestCase): - def test_single_file_to_stdout(self): with tempfile.TemporaryDirectory() as tmpdir: json_path = os.path.join(tmpdir, "test.json") diff --git a/test/unit/rules/test_abstract.py b/test/unit/rules/test_abstract.py index 8803effc..3699ec0e 100644 --- a/test/unit/rules/test_abstract.py +++ b/test/unit/rules/test_abstract.py @@ -1,9 +1,10 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from lark import Token, Tree from lark.tree import Meta -from hcl2.rules.abstract import LarkElement, LarkToken, LarkRule +from hcl2.rules.abstract import LarkToken, LarkRule from hcl2.utils import SerializationOptions, SerializationContext diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py index cfb6d666..bcf240a8 100644 --- a/test/unit/rules/test_base.py +++ b/test/unit/rules/test_base.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.const import IS_BLOCK @@ -5,7 +6,15 @@ from hcl2.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule, StringPartRule -from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE, DBLQUOTE, STRING_CHARS, NL_OR_COMMENT +from hcl2.rules.tokens import ( + NAME, + EQ, + LBRACE, + RBRACE, + DBLQUOTE, + STRING_CHARS, + NL_OR_COMMENT, +) from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.utils import SerializationOptions, SerializationContext diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 5ae28df4..196125dd 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.containers import ( diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py index 16800ed0..974885b5 100644 --- a/test/unit/rules/test_expressions.py +++ b/test/unit/rules/test_expressions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.abstract import LarkRule @@ -9,14 +10,13 @@ BinaryOpRule, UnaryOpRule, ) -from hcl2.rules.literal_rules import BinaryOperatorRule, IdentifierRule +from hcl2.rules.literal_rules import BinaryOperatorRule from hcl2.rules.tokens import ( LPAR, RPAR, QMARK, COLON, BINARY_OP, - NAME, StringToken, ) from hcl2.utils import SerializationOptions, SerializationContext @@ -67,8 +67,8 @@ def _make_binary_term(op_str, rhs_value): return BinaryTermRule([_make_binary_operator(op_str), _make_expr_term(rhs_value)]) -MINUS_TOKEN = StringToken["MINUS"] -NOT_TOKEN = StringToken["NOT"] +MINUS_TOKEN = StringToken["MINUS"] # type: ignore[type-arg,name-defined] +NOT_TOKEN = StringToken["NOT"] # type: ignore[type-arg,name-defined] # --- ExprTermRule tests --- diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py index febec643..0691d81c 100644 --- a/test/unit/rules/test_for_expressions.py +++ b/test/unit/rules/test_for_expressions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule @@ -45,26 +46,30 @@ def _make_identifier(name): def _make_for_intro_single(iter_name, iterable_value): """Build ForIntroRule with a single iterator: for iter_name in iterable :""" - return ForIntroRule([ - FOR(), - _make_identifier(iter_name), - IN(), - StubExpression(iterable_value), - COLON(), - ]) + return ForIntroRule( + [ + FOR(), + _make_identifier(iter_name), + IN(), + StubExpression(iterable_value), + COLON(), + ] + ) def _make_for_intro_dual(iter1_name, iter2_name, iterable_value): """Build ForIntroRule with dual iterators: for iter1, iter2 in iterable :""" - return ForIntroRule([ - FOR(), - _make_identifier(iter1_name), - COMMA(), - _make_identifier(iter2_name), - IN(), - StubExpression(iterable_value), - COLON(), - ]) + return ForIntroRule( + [ + FOR(), + _make_identifier(iter1_name), + COMMA(), + _make_identifier(iter2_name), + IN(), + StubExpression(iterable_value), + COLON(), + ] + ) def _make_for_cond(value): @@ -87,7 +92,9 @@ def test_first_iterator_single(self): def test_first_iterator_dual(self): i1 = _make_identifier("k") i2 = _make_identifier("v") - rule = ForIntroRule([FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()]) + rule = ForIntroRule( + [FOR(), i1, COMMA(), i2, IN(), StubExpression("items"), COLON()] + ) self.assertIs(rule.first_iterator, i1) def test_second_iterator_none_when_single(self): @@ -96,15 +103,17 @@ def test_second_iterator_none_when_single(self): def test_second_iterator_present_when_dual(self): i2 = _make_identifier("v") - rule = ForIntroRule([ - FOR(), - _make_identifier("k"), - COMMA(), - i2, - IN(), - StubExpression("items"), - COLON(), - ]) + rule = ForIntroRule( + [ + FOR(), + _make_identifier("k"), + COMMA(), + i2, + IN(), + StubExpression("items"), + COLON(), + ] + ) self.assertIs(rule.second_iterator, i2) def test_iterable_property(self): @@ -160,61 +169,73 @@ def test_for_intro_property(self): def test_value_expr_property(self): value_expr = StubExpression("expr") - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - value_expr, - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + value_expr, + RSQB(), + ] + ) self.assertIs(rule.value_expr, value_expr) def test_condition_none(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) self.assertIsNone(rule.condition) def test_condition_present(self): cond = _make_for_cond("cond") - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - cond, - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + cond, + RSQB(), + ] + ) self.assertIsInstance(rule.condition, ForCondRule) self.assertIs(rule.condition, cond) def test_serialize_without_condition(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) self.assertEqual(rule.serialize(), "${[for v in items : expr]}") def test_serialize_with_condition(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - _make_for_cond("cond"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + _make_for_cond("cond"), + RSQB(), + ] + ) self.assertEqual(rule.serialize(), "${[for v in items : expr if cond]}") def test_serialize_inside_dollar_string(self): - rule = ForTupleExprRule([ - LSQB(), - _make_for_intro_single("v", "items"), - StubExpression("expr"), - RSQB(), - ]) + rule = ForTupleExprRule( + [ + LSQB(), + _make_for_intro_single("v", "items"), + StubExpression("expr"), + RSQB(), + ] + ) ctx = SerializationContext(inside_dollar_string=True) self.assertEqual(rule.serialize(context=ctx), "[for v in items : expr]") @@ -228,124 +249,144 @@ def test_lark_name(self): def test_for_intro_property(self): intro = _make_for_intro_dual("k", "v", "items") - rule = ForObjectExprRule([ - LBRACE(), - intro, - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + intro, + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIs(rule.for_intro, intro) def test_key_expr_property(self): key_expr = StubExpression("key") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - key_expr, - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + key_expr, + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIs(rule.key_expr, key_expr) def test_value_expr_property(self): value_expr = StubExpression("value") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - value_expr, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ] + ) self.assertIs(rule.value_expr, value_expr) def test_ellipsis_none(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIsNone(rule.ellipsis) def test_ellipsis_present(self): ellipsis = ELLIPSIS() - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - ellipsis, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ellipsis, + RBRACE(), + ] + ) self.assertIs(rule.ellipsis, ellipsis) def test_condition_none(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertIsNone(rule.condition) def test_condition_present(self): cond = _make_for_cond("cond") - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - cond, - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + cond, + RBRACE(), + ] + ) self.assertIsInstance(rule.condition, ForCondRule) self.assertIs(rule.condition, cond) def test_serialize_basic(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + RBRACE(), + ] + ) self.assertEqual(rule.serialize(), "${{for k, v in items : key => value}}") def test_serialize_with_ellipsis(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - ELLIPSIS(), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + ELLIPSIS(), + RBRACE(), + ] + ) result = rule.serialize() self.assertIn("...", result) self.assertEqual(result, "${{for k, v in items : key => value...}}") def test_serialize_with_condition(self): - rule = ForObjectExprRule([ - LBRACE(), - _make_for_intro_dual("k", "v", "items"), - StubExpression("key"), - FOR_OBJECT_ARROW(), - StubExpression("value"), - _make_for_cond("cond"), - RBRACE(), - ]) + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + StubExpression("value"), + _make_for_cond("cond"), + RBRACE(), + ] + ) result = rule.serialize() self.assertIn("if cond", result) self.assertEqual(result, "${{for k, v in items : key => value if cond}}") diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 4c4b336e..18a763bd 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule diff --git a/test/unit/rules/test_literal_rules.py b/test/unit/rules/test_literal_rules.py index f8513c21..9a834e14 100644 --- a/test/unit/rules/test_literal_rules.py +++ b/test/unit/rules/test_literal_rules.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.literal_rules import ( diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index e7fb28f1..9abf7eb6 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -1,3 +1,4 @@ +# pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase from hcl2.rules.expressions import ExpressionRule @@ -201,7 +202,7 @@ def test_serialize_no_preserve_escapes_quotes(self): self.assertEqual(result, '"say \\"hello\\""') def test_serialize_no_preserve_escapes_backslashes(self): - token = HEREDOC_TEMPLATE('< Date: Sat, 7 Mar 2026 21:09:52 +0100 Subject: [PATCH 34/42] update docs --- README.md | 58 +++++-- docs/usage.md | 306 +++++++++++++++++++++++++++++++++ tree-to-hcl2-reconstruction.md | 248 -------------------------- 3 files changed, 354 insertions(+), 258 deletions(-) create mode 100644 docs/usage.md delete mode 100644 tree-to-hcl2-reconstruction.md diff --git a/README.md b/README.md index 1ff75876..c93f1e07 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ [![Codacy Badge](https://app.codacy.com/project/badge/Grade/2e2015f9297346cbaa788c46ab957827)](https://app.codacy.com/gh/amplify-education/python-hcl2/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) -[![Build Status](https://travis-ci.org/amplify-education/python-hcl2.svg?branch=master)](https://travis-ci.org/amplify-education/python-hcl2) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/amplify-education/python-hcl2/master/LICENSE) [![PyPI](https://img.shields.io/pypi/v/python-hcl2.svg)](https://pypi.org/project/python-hcl2/) [![Python Versions](https://img.shields.io/pypi/pyversions/python-hcl2.svg)](https://pypi.python.org/pypi/python-hcl2) @@ -36,19 +35,58 @@ pip3 install python-hcl2 ### Usage +**HCL2 to Python dict:** + ```python import hcl2 -with open('foo.tf', 'r') as file: - dict = hcl2.load(file) + +with open("main.tf") as f: + data = hcl2.load(f) ``` -### Parse Tree to HCL2 reconstruction +**Python dict to HCL2:** + +```python +import hcl2 + +hcl_string = hcl2.dumps(data) + +with open("output.tf", "w") as f: + hcl2.dump(data, f) +``` -With version 6.x the possibility of HCL2 reconstruction from the Lark Parse Tree and Python dictionaries directly was introduced. +**Building HCL from scratch:** -Documentation and an example of manipulating Lark Parse Tree and reconstructing it back into valid HCL2 can be found in [tree-to-hcl2-reconstruction.md](https://github.com/amplify-education/python-hcl2/blob/main/tree-to-hcl2-reconstruction.md) file. +```python +import hcl2 + +doc = hcl2.Builder() +res = doc.block("resource", labels=["aws_instance", "web"], ami="abc-123", instance_type="t2.micro") +res.block("tags", Name="HelloWorld") + +hcl_string = hcl2.dumps(doc.build()) +``` + +For the full API reference, option dataclasses, intermediate pipeline stages, and more examples +see [docs/usage.md](https://github.com/amplify-education/python-hcl2/blob/main/docs/usage.md). + +### CLI Tools + +python-hcl2 ships two command-line converters: + +```sh +# HCL2 → JSON +hcl2tojson main.tf # prints JSON to stdout +hcl2tojson main.tf output.json # writes to file +hcl2tojson terraform/ output/ # converts a directory + +# JSON → HCL2 +jsontohcl2 output.json # prints HCL2 to stdout +jsontohcl2 output.json main.tf # writes to file +jsontohcl2 output/ terraform/ # converts a directory +``` -More details about reconstruction implementation can be found in PRs #169 and #177. +Both commands accept `-` as PATH to read from stdin. Run `hcl2tojson --help` or `jsontohcl2 --help` for the full list of flags. ## Building From Source @@ -61,7 +99,7 @@ Running `tox` will automatically execute linters as well as the unit tests. You can also run them individually with the `-e` argument. -For example, `tox -e py37-unit` will run the unit tests for python 3.7 +For example, `tox -e py310-unit` will run the unit tests for python 3.10 To see all the available options, run `tox -l`. @@ -81,9 +119,9 @@ You can reach us at We welcome pull requests! For your pull request to be accepted smoothly, we suggest that you: - For any sizable change, first open a GitHub issue to discuss your idea. -- Create a pull request. Explain why you want to make the change and what it’s for. +- Create a pull request. Explain why you want to make the change and what it's for. -We’ll try to answer any PR’s promptly. +We'll try to answer any PR's promptly. ## Limitations diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 00000000..f6a5f6d6 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,306 @@ +# python-hcl2 Usage Guide + +## Quick Reference + +| Function | Description | +|---|---| +| `hcl2.load(file)` | Parse an HCL2 file to a Python dict | +| `hcl2.loads(text)` | Parse an HCL2 string to a Python dict | +| `hcl2.dump(data, file)` | Write a Python dict as HCL2 to a file | +| `hcl2.dumps(data)` | Convert a Python dict to an HCL2 string | +| `hcl2.parse(file)` | Parse an HCL2 file to a LarkElement tree | +| `hcl2.parses(text)` | Parse an HCL2 string to a LarkElement tree | +| `hcl2.parse_to_tree(file)` | Parse an HCL2 file to a raw Lark tree | +| `hcl2.parses_to_tree(text)` | Parse an HCL2 string to a raw Lark tree | +| `hcl2.transform(lark_tree)` | Transform a raw Lark tree into a LarkElement tree | +| `hcl2.serialize(tree)` | Serialize a LarkElement tree to a Python dict | +| `hcl2.from_dict(data)` | Convert a Python dict into a LarkElement tree | +| `hcl2.from_json(text)` | Convert a JSON string into a LarkElement tree | +| `hcl2.reconstruct(tree)` | Convert a LarkElement tree (or Lark tree) to HCL2 text | +| `hcl2.Builder()` | Build HCL documents programmatically | + +## HCL to Python dict + +Use `load` / `loads` to parse HCL2 into a Python dictionary: + +```python +import hcl2 + +with open("main.tf") as f: + data = hcl2.load(f) + +# or from a string +data = hcl2.loads('resource "aws_instance" "web" { ami = "abc-123" }') +``` + +### SerializationOptions + +Pass `serialization_options` to control how the dict is produced: + +```python +from hcl2 import loads, SerializationOptions + +data = loads(text, serialization_options=SerializationOptions( + with_meta=True, + wrap_objects=True, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `with_comments` | `bool` | `True` | Include comments in the output | +| `with_meta` | `bool` | `False` | Add `__start_line__` / `__end_line__` metadata | +| `wrap_objects` | `bool` | `False` | Wrap object values as inline HCL2 strings | +| `wrap_tuples` | `bool` | `False` | Wrap tuple values as inline HCL2 strings | +| `explicit_blocks` | `bool` | `True` | Add `__is_block__: True` markers to blocks | +| `preserve_heredocs` | `bool` | `True` | Keep heredocs in their original form | +| `force_operation_parentheses` | `bool` | `False` | Force parentheses around all operations | +| `preserve_scientific_notation` | `bool` | `True` | Keep scientific notation as-is | + +## Python dict to HCL + +Use `dump` / `dumps` to convert a Python dictionary back into HCL2 text: + +```python +import hcl2 + +hcl_string = hcl2.dumps(data) + +with open("output.tf", "w") as f: + hcl2.dump(data, f) +``` + +### DeserializerOptions + +Control how the dict is interpreted when building the LarkElement tree: + +```python +from hcl2 import dumps, DeserializerOptions + +text = dumps(data, deserializer_options=DeserializerOptions( + object_elements_colon=True, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `heredocs_to_strings` | `bool` | `False` | Convert heredocs to plain strings | +| `strings_to_heredocs` | `bool` | `False` | Convert strings with `\n` to heredocs | +| `object_elements_colon` | `bool` | `False` | Use `:` instead of `=` in object elements | +| `object_elements_trailing_comma` | `bool` | `True` | Add trailing commas in object elements | + +### FormatterOptions + +Control whitespace and alignment in the generated HCL2: + +```python +from hcl2 import dumps, FormatterOptions + +text = dumps(data, formatter_options=FormatterOptions( + indent_length=4, + vertically_align_attributes=False, +)) +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `indent_length` | `int` | `2` | Number of spaces per indentation level | +| `open_empty_blocks` | `bool` | `True` | Expand empty blocks across multiple lines | +| `open_empty_objects` | `bool` | `True` | Expand empty objects across multiple lines | +| `open_empty_tuples` | `bool` | `False` | Expand empty tuples across multiple lines | +| `vertically_align_attributes` | `bool` | `True` | Vertically align `=` signs in attribute groups | +| `vertically_align_object_elements` | `bool` | `True` | Vertically align `=` signs in object elements | + +## Building HCL from scratch + +The `Builder` class produces dicts with the correct `__is_block__` markers so that `dumps` can distinguish blocks from plain objects: + +```python +import hcl2 + +doc = hcl2.Builder() +res = doc.block("resource", labels=["aws_instance", "web"], + ami="abc-123", instance_type="t2.micro") +res.block("tags", Name="HelloWorld") + +hcl_string = hcl2.dumps(doc.build()) +``` + +Output: + +```hcl +resource "aws_instance" "web" { + ami = "abc-123" + instance_type = "t2.micro" + + tags { + Name = "HelloWorld" + } +} +``` + +### Builder.block() + +```python +block( + block_type: str, + labels: Optional[List[str]] = None, + __nested_builder__: Optional[Builder] = None, + **attributes, +) -> Builder +``` + +Returns the child `Builder` for the new block, allowing chained calls. + +## Intermediate pipeline stages + +The full pipeline looks like this: + +``` +Forward: HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict +Reverse: Python Dict → LarkElement Tree → HCL2 Text +``` + +You can access each stage individually for advanced use cases. + +### parse / parses — HCL2 text to LarkElement tree + +```python +tree = hcl2.parses('x = 1') # StartRule +tree = hcl2.parse(open("main.tf")) # StartRule +``` + +Pass `discard_comments=True` to strip comments during transformation. + +### parse_to_tree / parses_to_tree — HCL2 text to raw Lark tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') # lark.Tree +``` + +### transform — raw Lark tree to LarkElement tree + +```python +lark_tree = hcl2.parses_to_tree('x = 1') +tree = hcl2.transform(lark_tree) # StartRule +``` + +### serialize — LarkElement tree to Python dict + +```python +tree = hcl2.parses('x = 1') +data = hcl2.serialize(tree) +# or with options: +from hcl2 import SerializationOptions +data = hcl2.serialize(tree, serialization_options=SerializationOptions(with_meta=True)) +``` + +### from_dict / from_json — Python dict or JSON to LarkElement tree + +```python +tree = hcl2.from_dict(data) # StartRule +tree = hcl2.from_json('{"x": 1}') # StartRule +``` + +Both accept optional `deserializer_options`, `formatter_options`, and `apply_format` (default `True`). + +### reconstruct — LarkElement tree (or Lark tree) to HCL2 text + +```python +tree = hcl2.from_dict(data) +text = hcl2.reconstruct(tree) +``` + +## CLI Tools + +### hcl2tojson + +Convert HCL2 files to JSON. + +```sh +hcl2tojson main.tf # print JSON to stdout +hcl2tojson main.tf output.json # write to file +hcl2tojson terraform/ output/ # convert a directory +cat main.tf | hcl2tojson - # read from stdin +``` + +**Flags:** + +| Flag | Description | +|---|---| +| `-s` | Skip un-parsable files | +| `--json-indent N` | JSON indentation width (default: 2) | +| `--with-meta` | Add `__start_line__` / `__end_line__` metadata | +| `--with-comments` | Include comments in the output | +| `--wrap-objects` | Wrap object values as inline HCL2 | +| `--wrap-tuples` | Wrap tuple values as inline HCL2 | +| `--no-explicit-blocks` | Disable `__is_block__` markers | +| `--no-preserve-heredocs` | Convert heredocs to plain strings | +| `--force-parens` | Force parentheses around all operations | +| `--no-preserve-scientific` | Convert scientific notation to standard floats | +| `--version` | Show version and exit | + +### jsontohcl2 + +Convert JSON files to HCL2. + +```sh +jsontohcl2 output.json # print HCL2 to stdout +jsontohcl2 output.json main.tf # write to file +jsontohcl2 output/ terraform/ # convert a directory +cat output.json | jsontohcl2 - # read from stdin +``` + +**Flags:** + +| Flag | Description | +|---|---| +| `-s` | Skip un-parsable files | +| `--indent N` | Indentation width (default: 2) | +| `--colon-separator` | Use `:` instead of `=` in object elements | +| `--no-trailing-comma` | Omit trailing commas in object elements | +| `--heredocs-to-strings` | Convert heredocs to plain strings | +| `--strings-to-heredocs` | Convert strings with escaped newlines to heredocs | +| `--no-open-empty-blocks` | Collapse empty blocks to a single line | +| `--no-open-empty-objects` | Collapse empty objects to a single line | +| `--open-empty-tuples` | Expand empty tuples across multiple lines | +| `--no-align` | Disable vertical alignment of attributes and object elements | +| `--version` | Show version and exit | + +## Pipeline Diagram + +``` + Forward Pipeline + ================ + HCL2 Text + │ + ▼ + ┌──────────────────┐ parse_to_tree / parses_to_tree + │ Lark Parse Tree │ + └────────┬─────────┘ + │ transform + ▼ + ┌──────────────────┐ + │ LarkElement Tree │ parse / parses (shortcut: HCL2 text → here) + └────────┬─────────┘ + │ serialize + ▼ + ┌──────────────────┐ + │ Python Dict │ load / loads (shortcut: HCL2 text → here) + └──────────────────┘ + + + Reverse Pipeline + ================ + Python Dict / JSON + │ + ▼ + ┌──────────────────┐ from_dict / from_json + │ LarkElement Tree │ + └────────┬─────────┘ + │ reconstruct + ▼ + ┌──────────────────┐ + │ HCL2 Text │ dump / dumps (shortcut: Python Dict / JSON → here) + └──────────────────┘ +``` diff --git a/tree-to-hcl2-reconstruction.md b/tree-to-hcl2-reconstruction.md deleted file mode 100644 index 1a5f83dc..00000000 --- a/tree-to-hcl2-reconstruction.md +++ /dev/null @@ -1,248 +0,0 @@ -# Writing HCL2 from Python - -Version 6 of this library supports reconstructing HCL files directly from -Python. This guide details how the reconstruction process takes place. See -also: [Limitations](#limitations) - -There are three major phases: - -- [Building a Python Dictionary](#building-a-python-dictionary) -- [Building an AST](#building-an-ast) -- [Reconstructing the file from the AST](#reconstructing-the-file-from-the-ast) - -## Example - -To create the `example.tf` file with the following content: - -```terraform -resource "aws_s3_bucket" "bucket" { - bucket = "bucket_id" - force_destroy = true -} -``` - -You can use the `hcl2.Builder` class like so: - -```python -import hcl2 - -example = hcl2.Builder() - -example.block( - "resource", - ["aws_s3_bucket", "bucket"], - bucket="bucket_id", - force_destroy=True, -) - -example_dict = example.build() -example_ast = hcl2.reverse_transform(example_dict) -example_file = hcl2.writes(example_ast) - -print(example_file) -# resource "aws_s3_bucket" "bucket" { -# bucket = "bucket_id" -# force_destroy = true -# } -# -``` - -This demonstrates a couple of different phases of the process worth mentioning. - -### Building a Python dictionary - -The `hcl2.Builder` class produces a dictionary that should be identical to the -output of `hcl2.load(example_file, with_meta=True)`. The `with_meta` keyword -argument is important here. HCL "blocks" in the Python dictionary are -identified by the presence of `__start_line__` and `__end_line__` metadata -within them. The `Builder` class handles adding that metadata. If that metadata -is missing, the `hcl2.reconstructor.HCLReverseTransformer` class fails to -identify what is a block and what is just an attribute with an object value. -Without that metadata, this dictionary: - -```python -{ - "resource": [ - { - "aws_s3_bucket": { - "bucket": { - "bucket": "bucket_id", - "force_destroy": True, - # "__start_line__": -1, - # "__end_line__": -1, - } - } - } - ] -} -``` - -Would produce this HCL output: - -```terraform -resource = [{ - aws_s3_bucket = { - bucket = { - bucket = "bucket_id" - force_destroy = true - } - } -}] -``` - -(This output parses to the same datastructure, but isn't formatted in blocks -as desired by the user. Therefore, using the `Builder` class is recommended.) - -### Building an AST - -The `hcl2.reconstructor.HCLReconstructor` class operates on an "abstract -syntax tree" (`hcl2.AST` or `Lark.Tree`, they're the same.) To produce this AST -from scratch in Python, use `hcl2.reverse_transform(hcl_dict)`, and to produce -this AST from an existing HCL file, use `hcl2.parse(hcl_file)`. - -You can also build these ASTs manually, if you want more control over the -generated HCL output. If you do this, though, make sure the AST you generate is -valid within the `hcl2.lark` grammar. - -Here's an example, which would add a "tags" element to that `example.tf` file -mentioned above. - -```python -from copy import deepcopy -from lark import Token, Tree -import hcl2 - - -def build_tags_tree(base_indent: int = 0) -> Tree: - # build Tree representing following HCL2 structure - # tags = { - # Name = "My bucket" - # Environment = "Dev" - # } - return Tree('attribute', [ - Tree('identifier', [ - Token('NAME', 'tags') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Tree('object', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * (base_indent + 1)), - ]), - Tree('object_elem', [ - Tree('identifier', [ - Token('NAME', 'Name') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Token('STRING_LIT', '"My bucket"') - ]) - ]), - Tree('new_line_and_or_comma', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * (base_indent + 1)), - ]), - ]), - Tree('object_elem', [ - Tree('identifier', [ - Token('NAME', 'Environment') - ]), - Token('EQ', '='), - Tree('expr_term', [ - Token('STRING_LIT', '"Dev"') - ]) - ]), - Tree('new_line_and_or_comma', [ - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n' + ' ' * base_indent), - ]), - ]), - ]), - ]) - ]) - - -def is_bucket_block(tree: Tree) -> bool: - # check whether given Tree represents `resource "aws_s3_bucket" "bucket"` - try: - return tree.data == 'block' and tree.children[2].value == '"bucket"' - except IndexError: - return False - - -def insert_tags(tree: Tree, indent: int = 0) -> Tree: - # Insert tags tree and adjust surrounding whitespaces to match indentation - new_children = [*tree.children.copy(), build_tags_tree(indent)] - # add indentation before tags tree - new_children[len(tree.children) - 1] = Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n ') - ]) - # move closing bracket to the new line - new_children.append( - Tree('new_line_or_comment', [ - Token('NL_OR_COMMENT', '\n') - ]) - ) - return Tree(tree.data, new_children) - - -def process_token(node: Token, indent=0): - # Print details of this token and return its copy - print(f'[{indent}] (token)\t|', ' ' * indent, node.type, node.value) - return deepcopy(node) - - -def process_tree(node: Tree, depth=0) -> Tree: - # Recursively iterate over tree's children - # the depth parameter represents recursion depth, - # it's used to deduce indentation for printing tree and for adjusting whitespace after adding tags - new_children = [] - print(f'[{depth}] (tree)\t|', ' ' * depth, node.data) - for child in node.children: - if isinstance(child, Tree): - if is_bucket_block(child): - block_children = child.children.copy() - # this child is the Tree representing block's actual body - block_children[3] = insert_tags(block_children[3], depth) - # replace original Tree with new one including the modified body - child = Tree(child.data, block_children) - - new_children.append(process_tree(child, depth + 1)) - - else: - new_children.append(process_token(child, depth + 1)) - - return Tree(node.data, new_children) - - -def main(): - tree = hcl2.parse(open('example.tf')) - new_tree = process_tree(tree) - reconstructed = hcl2.writes(new_tree) - open('example_reconstructed.tf', 'w').write(reconstructed) - - -if __name__ == "__main__": - main() - -``` - -### Reconstructing the file from the AST - -Once the AST has been generated, you can convert it back to valid HCL using -`hcl2.writes(ast)`. In the above example, that conversion is done in the -`main()` function. - -## Limitations - -- Some formatting choices are impossible to specify via `hcl2.Builder()` and - require manual intervention of the AST produced after the `reverse_transform` - step. - -- Most notably, this means it's not possible to generate files containing - comments (both inline and block comments) - -- Even when parsing a file directly and writing it back out, some formatting - information may be lost due to Terminals discarded during the parsing process. - The reconstructed output should still parse to the same dictionary at the end - of the day though. From 4a65479fdc406a07892364e51bc270aa0f2c26af Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 22:11:03 +0100 Subject: [PATCH 35/42] fix the limitation of using expressions as object keys --- README.md | 13 +--- hcl2/deserializer.py | 18 +---- hcl2/hcl2.lark | 4 +- hcl2/rules/containers.py | 57 ++++----------- hcl2/transformer.py | 26 +++---- test/integration/hcl2_original/object_keys.tf | 3 + .../hcl2_reconstructed/object_keys.tf | 3 + test/integration/hcl2_reconstructed/smoke.tf | 8 +-- .../json_reserialized/object_keys.json | 5 +- test/integration/json_reserialized/smoke.json | 2 +- .../json_serialized/object_keys.json | 5 +- test/integration/json_serialized/smoke.json | 2 +- test/unit/rules/test_containers.py | 70 ++++--------------- test/unit/test_deserializer.py | 17 +++-- 14 files changed, 70 insertions(+), 163 deletions(-) diff --git a/README.md b/README.md index c93f1e07..c848ef0f 100644 --- a/README.md +++ b/README.md @@ -125,15 +125,4 @@ We'll try to answer any PR's promptly. ## Limitations -### Using inline expression as an object key - -- Object key can be an expression as long as it is wrapped in parentheses: - ```terraform - locals { - foo = "bar" - baz = { - (format("key_prefix_%s", local.foo)) : "value" - # format("key_prefix_%s", local.foo) : "value" this will fail - } - } - ``` +None that are known. diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 03dac5d8..ae6cbc15 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -22,7 +22,6 @@ ObjectRule, ObjectElemRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ObjectElemKeyRule, ) from hcl2.rules.expressions import ExprTermRule @@ -51,7 +50,6 @@ RSQB, LSQB, COMMA, - DOT, LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, @@ -320,20 +318,8 @@ def _deserialize_object(self, value: dict) -> ObjectRule: def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule( - [ - child - for child in self._deserialize_expression(key).children - if child is not None - ] - ) - elif isinstance(key, str) and "." in key: - parts = key.split(".") - dot_children: List[Any] = [] - for part in parts: - dot_children.append(self._deserialize_identifier(part)) - dot_children.append(DOT()) - key = ObjectElemKeyDotAccessor(dot_children[:-1]) # without the last dot + expr = self._deserialize_expression(key) + key = ObjectElemKeyExpressionRule([expr]) else: key = self._deserialize_text(key) diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index 4a9f1ec6..f0248290 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -177,9 +177,7 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression -object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression -object_elem_key_expression : LPAR expression RPAR -object_elem_key_dot_accessor : identifier (DOT identifier)+ +object_elem_key : expression // Heredocs heredoc_template : HEREDOC_TEMPLATE diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 78e0bdeb..1ca5f69b 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -18,9 +18,6 @@ RBRACE, LSQB, RSQB, - LPAR, - RPAR, - DOT, ) from hcl2.rules.whitespace import ( NewLineOrCommentRule, @@ -114,65 +111,37 @@ def serialize( class ObjectElemKeyExpressionRule(LarkRule): - """Rule for parenthesized expression keys in objects.""" + """Rule for expression keys in objects (bare or parenthesized). - _children_layout: Tuple[ - LPAR, - ExpressionRule, - RPAR, - ] + Holds a single ExpressionRule child. Parenthesized keys like + ``(var.account)`` arrive as an ExprTermRule whose own ``serialize()`` + already emits the surrounding ``(…)``, so this class does not need + separate handling for bare vs parenthesized forms. + """ + + _children_layout: Tuple[ExpressionRule] @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "object_elem_key_expression" + return "object_elem_key" @property def expression(self) -> ExpressionRule: - """Return the parenthesized key expression.""" - return self._children[1] + """Return the key expression.""" + return self._children[0] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - """Serialize to '(expression)' string.""" + """Serialize to '${expression}' string.""" with context.modify(inside_dollar_string=True): - result = f"({self.expression.serialize(options, context)})" + result = str(self.expression.serialize(options, context)) if not context.inside_dollar_string: result = to_dollar_string(result) return result -class ObjectElemKeyDotAccessor(LarkRule): - """Rule for dot-accessor keys in objects (e.g. a.b.c).""" - - _children_layout: Tuple[ - IdentifierRule, - Tuple[ - IdentifierRule, - DOT, - ], - ] - - @staticmethod - def lark_name() -> str: - """Return the grammar rule name.""" - return "object_elem_key_dot_accessor" - - @property - def identifiers(self) -> List[IdentifierRule]: - """Return the chain of identifiers.""" - return [child for child in self._children if isinstance(child, IdentifierRule)] - - def serialize( - self, options=SerializationOptions(), context=SerializationContext() - ) -> Any: - """Serialize to 'a.b.c' string.""" - return ".".join( - identifier.serialize(options, context) for identifier in self.identifiers - ) - - class ObjectElemRule(LarkRule): """Rule for a single key = value element in an object.""" diff --git a/hcl2/transformer.py b/hcl2/transformer.py index aebf9d1f..d483cd90 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -15,7 +15,6 @@ ObjectElemKeyRule, TupleRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ) from hcl2.rules.expressions import ( BinaryTermRule, @@ -198,20 +197,17 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: return ObjectElemRule(args, meta) @v_args(meta=True) - def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: - return ObjectElemKeyRule(args, meta) - - @v_args(meta=True) - def object_elem_key_expression( - self, meta: Meta, args - ) -> ObjectElemKeyExpressionRule: - return ObjectElemKeyExpressionRule(args, meta) - - @v_args(meta=True) - def object_elem_key_dot_accessor( - self, meta: Meta, args - ) -> ObjectElemKeyDotAccessor: - return ObjectElemKeyDotAccessor(args, meta) + def object_elem_key(self, meta: Meta, args): + expr = args[0] + # Simple literals (identifier, string, int, float) wrapped in ExprTermRule + if isinstance(expr, ExprTermRule) and len(expr.children) == 5: + inner = expr.children[2] # position 2 in [None, None, inner, None, None] + if isinstance( + inner, (IdentifierRule, StringRule, IntLitRule, FloatLitRule) + ): + return ObjectElemKeyRule([inner], meta) + # Any other expression (parenthesized or bare) + return ObjectElemKeyExpressionRule([expr], meta) @v_args(meta=True) def arguments(self, meta: Meta, args) -> ArgumentsRule: diff --git a/test/integration/hcl2_original/object_keys.tf b/test/integration/hcl2_original/object_keys.tf index 913d5a42..c3f33146 100644 --- a/test/integration/hcl2_original/object_keys.tf +++ b/test/integration/hcl2_original/object_keys.tf @@ -5,4 +5,7 @@ bar = { (var.account) : 3 (format("key_prefix_%s", local.foo)) : 4 "prefix_${var.account}:${var.user}_suffix": 5, + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic" } diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf index 497e65a6..6d20581c 100644 --- a/test/integration/hcl2_reconstructed/object_keys.tf +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -5,4 +5,7 @@ bar = { (var.account) = 3, (format("key_prefix_%s", local.foo)) = 4, "prefix_${var.account}:${var.user}_suffix" = 5, + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic", } diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 970cc1cf..c0358021 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -7,12 +7,12 @@ block label1 label2 { f = "${"this is a string"}" g = 1 == 2 h = { - k1 = 5, - k2 = 10, - "k3" = { + k1 = 5, + k2 = 10, + "k3" = { k4 = "a", }, - (5 + 5) = "d", + (5 + 5) = "d", k5.attr.attr = "e", } i = [ diff --git a/test/integration/json_reserialized/object_keys.json b/test/integration/json_reserialized/object_keys.json index 8acccdea..3146aa52 100644 --- a/test/integration/json_reserialized/object_keys.json +++ b/test/integration/json_reserialized/object_keys.json @@ -5,6 +5,9 @@ "baz": 2, "${(var.account)}": 3, "${(format(\"key_prefix_%s\", local.foo))}": 4, - "\"prefix_${var.account}:${var.user}_suffix\"": 5 + "\"prefix_${var.account}:${var.user}_suffix\"": 5, + "${1 + 1}": "\"two\"", + "${(2 + 2)}": "\"four\"", + "${format(\"key_%s\", var.name)}": "\"dynamic\"" } } diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json index 5bcc702a..dbff114f 100644 --- a/test/integration/json_reserialized/smoke.json +++ b/test/integration/json_reserialized/smoke.json @@ -17,7 +17,7 @@ "k4": "\"a\"" }, "${(5 + 5)}": "\"d\"", - "k5.attr.attr": "\"e\"" + "${k5.attr.attr}": "\"e\"" }, "i": [ "a", diff --git a/test/integration/json_serialized/object_keys.json b/test/integration/json_serialized/object_keys.json index 8acccdea..3146aa52 100644 --- a/test/integration/json_serialized/object_keys.json +++ b/test/integration/json_serialized/object_keys.json @@ -5,6 +5,9 @@ "baz": 2, "${(var.account)}": 3, "${(format(\"key_prefix_%s\", local.foo))}": 4, - "\"prefix_${var.account}:${var.user}_suffix\"": 5 + "\"prefix_${var.account}:${var.user}_suffix\"": 5, + "${1 + 1}": "\"two\"", + "${(2 + 2)}": "\"four\"", + "${format(\"key_%s\", var.name)}": "\"dynamic\"" } } diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json index 5bcc702a..dbff114f 100644 --- a/test/integration/json_serialized/smoke.json +++ b/test/integration/json_serialized/smoke.json @@ -17,7 +17,7 @@ "k4": "\"a\"" }, "${(5 + 5)}": "\"d\"", - "k5.attr.attr": "\"e\"" + "${k5.attr.attr}": "\"e\"" }, "i": [ "a", diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 196125dd..0231987d 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -5,7 +5,6 @@ TupleRule, ObjectElemKeyRule, ObjectElemKeyExpressionRule, - ObjectElemKeyDotAccessor, ObjectElemRule, ObjectRule, ) @@ -17,9 +16,6 @@ RSQB, LBRACE, RBRACE, - LPAR, - RPAR, - DOT, EQ, COLON, COMMA, @@ -188,70 +184,28 @@ def test_serialize_string(self): class TestObjectElemKeyExpressionRule(TestCase): def test_lark_name(self): - self.assertEqual( - ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expression" - ) + self.assertEqual(ObjectElemKeyExpressionRule.lark_name(), "object_elem_key") def test_expression_property(self): - expr = StubExpression("5 + 5") - rule = ObjectElemKeyExpressionRule([LPAR(), expr, RPAR()]) + expr = StubExpression("1 + 1") + rule = ObjectElemKeyExpressionRule([expr]) self.assertIs(rule.expression, expr) - def test_serialize(self): - rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + def test_serialize_bare(self): + rule = ObjectElemKeyExpressionRule([StubExpression("1 + 1")]) result = rule.serialize() - self.assertEqual(result, "${(5 + 5)}") + self.assertEqual(result, "${1 + 1}") def test_serialize_inside_dollar_string(self): - rule = ObjectElemKeyExpressionRule([LPAR(), StubExpression("5 + 5"), RPAR()]) + rule = ObjectElemKeyExpressionRule([StubExpression("1 + 1")]) ctx = SerializationContext(inside_dollar_string=True) result = rule.serialize(context=ctx) - self.assertEqual(result, "(5 + 5)") - - -# --- ObjectElemKeyDotAccessor tests --- - - -class TestObjectElemKeyDotAccessor(TestCase): - def test_lark_name(self): - self.assertEqual( - ObjectElemKeyDotAccessor.lark_name(), "object_elem_key_dot_accessor" - ) + self.assertEqual(result, "1 + 1") - def test_identifiers_property(self): - i1 = _make_identifier("k5") - i2 = _make_identifier("attr") - i3 = _make_identifier("sub") - rule = ObjectElemKeyDotAccessor([i1, DOT(), i2, DOT(), i3]) - idents = rule.identifiers - self.assertEqual(len(idents), 3) - self.assertIs(idents[0], i1) - self.assertIs(idents[1], i2) - self.assertIs(idents[2], i3) - - def test_identifiers_two_segments(self): - i1 = _make_identifier("a") - i2 = _make_identifier("b") - rule = ObjectElemKeyDotAccessor([i1, DOT(), i2]) - self.assertEqual(len(rule.identifiers), 2) - - def test_serialize(self): - rule = ObjectElemKeyDotAccessor( - [ - _make_identifier("k5"), - DOT(), - _make_identifier("attr"), - DOT(), - _make_identifier("sub"), - ] - ) - self.assertEqual(rule.serialize(), "k5.attr.sub") - - def test_serialize_two_segments(self): - rule = ObjectElemKeyDotAccessor( - [_make_identifier("a"), DOT(), _make_identifier("b")] - ) - self.assertEqual(rule.serialize(), "a.b") + def test_serialize_function_call(self): + rule = ObjectElemKeyExpressionRule([StubExpression('format("k", v)')]) + result = rule.serialize() + self.assertEqual(result, '${format("k", v)}') # --- ObjectElemRule tests --- diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 41ecd35f..8ae7db67 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -8,7 +8,6 @@ TupleRule, ObjectRule, ObjectElemRule, - ObjectElemKeyDotAccessor, ObjectElemKeyExpressionRule, ) from hcl2.rules.expressions import ExprTermRule @@ -364,15 +363,19 @@ def test_dotted_key_object_element(self): result = d._deserialize_object_elem("a.b", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyDotAccessor) - identifiers = key_rule.value.identifiers - self.assertEqual(len(identifiers), 2) - self.assertEqual(identifiers[0].token.value, "a") - self.assertEqual(identifiers[1].token.value, "b") + self.assertIsInstance(key_rule.value, IdentifierRule) + self.assertEqual(key_rule.value.token.value, "a.b") def test_expression_key_object_element(self): d = _deser() - result = d._deserialize_object_elem("${var.key}", 1) + result = d._deserialize_object_elem("${(var.key)}", 1) + self.assertIsInstance(result, ObjectElemRule) + key_rule = result.key + self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + + def test_bare_expression_key_object_element(self): + d = _deser() + result = d._deserialize_object_elem("${1 + 1}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) From a06600262d749d2900ef68edce8127e12284825d Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 22:34:00 +0100 Subject: [PATCH 36/42] `Formatter._vertically_align_object_elems` - fix alignment for expressions and keys --- hcl2/formatter.py | 27 ++++++++++++++++--- .../hcl2_reconstructed/object_keys.tf | 10 +++---- test/integration/hcl2_reconstructed/smoke.tf | 6 ++--- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 8b691c44..1b0702c9 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -10,7 +10,13 @@ AttributeRule, BodyRule, ) -from hcl2.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, + ObjectElemKeyExpressionRule, + TupleRule, +) from hcl2.rules.expressions import ExprTermRule from hcl2.rules.for_expressions import ( ForTupleExprRule, @@ -246,9 +252,9 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): ) def _vertically_align_object_elems(self, rule: ObjectRule): - max_length = max(len(elem.key.serialize()) for elem in rule.elements) + max_length = max(self._key_text_width(elem.key) for elem in rule.elements) for elem in rule.elements: - key_length = len(elem.key.serialize()) + key_length = self._key_text_width(elem.key) spaces_to_add = max_length - key_length @@ -258,6 +264,21 @@ def _vertically_align_object_elems(self, rule: ObjectRule): elem.children[1].set_value(" " * spaces_to_add + separator.value) + @staticmethod + def _key_text_width(key: LarkElement) -> int: + """Compute the HCL text width of an object element key.""" + width = len(str(key.serialize())) + # Expression keys serialize with ${...} wrapping (+3 chars vs HCL text). + # Handle both direct ObjectElemKeyExpressionRule (from parser) and + # ObjectElemKeyRule wrapping one (from deserializer). + if isinstance(key, ObjectElemKeyExpressionRule): + width -= 3 + elif isinstance(key, ObjectElemKeyRule) and isinstance( + key.value, ObjectElemKeyExpressionRule + ): + width -= 3 + return width + def _build_newline( self, next_line_indent: int = 0, count: int = 1 ) -> NewLineOrCommentRule: diff --git a/test/integration/hcl2_reconstructed/object_keys.tf b/test/integration/hcl2_reconstructed/object_keys.tf index 6d20581c..002bf6d9 100644 --- a/test/integration/hcl2_reconstructed/object_keys.tf +++ b/test/integration/hcl2_reconstructed/object_keys.tf @@ -2,10 +2,10 @@ bar = { 0 = 0, "foo" = 1, baz = 2, - (var.account) = 3, - (format("key_prefix_%s", local.foo)) = 4, + (var.account) = 3, + (format("key_prefix_%s", local.foo)) = 4, "prefix_${var.account}:${var.user}_suffix" = 5, - 1 + 1 = "two", - (2 + 2) = "four", - format("key_%s", var.name) = "dynamic", + 1 + 1 = "two", + (2 + 2) = "four", + format("key_%s", var.name) = "dynamic", } diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index c0358021..743cf9ac 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -7,9 +7,9 @@ block label1 label2 { f = "${"this is a string"}" g = 1 == 2 h = { - k1 = 5, - k2 = 10, - "k3" = { + k1 = 5, + k2 = 10, + "k3" = { k4 = "a", }, (5 + 5) = "d", From deaf0939b199aec1090ad9d89ac5dc87dade9b2c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:12:48 +0100 Subject: [PATCH 37/42] remove unused test files --- .../terraform-config-json/backend.json | 40 ------ .../helpers/terraform-config-json/blocks.json | 34 ----- .../terraform-config-json/cloudwatch.json | 28 ---- .../terraform-config-json/data_sources.json | 12 -- .../terraform-config-json/empty-heredoc.json | 1 - .../terraform-config-json/escapes.json | 9 -- test/helpers/terraform-config-json/iam.json | 41 ------ .../locals_embedded_condition.json | 11 -- .../locals_embedded_function.json | 7 - ...locals_embedded_multi_function_nested.json | 8 -- .../multiline_expressions.json | 56 -------- test/helpers/terraform-config-json/nulls.json | 1 - .../provider_function.json | 8 -- .../resource_keyword_attribute.json | 16 --- .../terraform-config-json/route_table.json | 24 ---- test/helpers/terraform-config-json/s3.json | 47 ------- .../string_interpolations.json | 13 -- .../terraform-config-json/test_floats.json | 30 ---- .../unicode_strings.json | 20 --- .../terraform-config-json/variables.json | 117 ---------------- .../terraform-config-json/vars.auto.json | 7 - test/helpers/terraform-config/backend.tf | 31 ----- test/helpers/terraform-config/blocks.tf | 22 --- test/helpers/terraform-config/cloudwatch.tf | 24 ---- test/helpers/terraform-config/data_sources.tf | 8 -- .../terraform-config/empty-heredoc.hcl2 | 2 - test/helpers/terraform-config/escapes.tf | 3 - test/helpers/terraform-config/iam.tf | 37 ----- .../locals_embedded_condition.tf | 7 - .../locals_embedded_function.tf | 3 - .../locals_embedded_multi_function_nested.tf | 6 - .../terraform-config/multiline_expressions.tf | 60 -------- .../terraform-config/provider_function.tf | 4 - .../resource_keyword_attribute.tf | 8 -- test/helpers/terraform-config/route_table.tf | 19 --- test/helpers/terraform-config/s3.tf | 36 ----- test/helpers/terraform-config/variables.tf | 129 ------------------ .../helpers/terraform-config/vars.auto.tfvars | 2 - test/helpers/with-meta/data_sources.json | 14 -- test/helpers/with-meta/data_sources.tf | 8 -- 40 files changed, 953 deletions(-) delete mode 100644 test/helpers/terraform-config-json/backend.json delete mode 100644 test/helpers/terraform-config-json/blocks.json delete mode 100644 test/helpers/terraform-config-json/cloudwatch.json delete mode 100644 test/helpers/terraform-config-json/data_sources.json delete mode 100644 test/helpers/terraform-config-json/empty-heredoc.json delete mode 100644 test/helpers/terraform-config-json/escapes.json delete mode 100644 test/helpers/terraform-config-json/iam.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_condition.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_function.json delete mode 100644 test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json delete mode 100644 test/helpers/terraform-config-json/multiline_expressions.json delete mode 100644 test/helpers/terraform-config-json/nulls.json delete mode 100644 test/helpers/terraform-config-json/provider_function.json delete mode 100644 test/helpers/terraform-config-json/resource_keyword_attribute.json delete mode 100644 test/helpers/terraform-config-json/route_table.json delete mode 100644 test/helpers/terraform-config-json/s3.json delete mode 100644 test/helpers/terraform-config-json/string_interpolations.json delete mode 100644 test/helpers/terraform-config-json/test_floats.json delete mode 100644 test/helpers/terraform-config-json/unicode_strings.json delete mode 100644 test/helpers/terraform-config-json/variables.json delete mode 100644 test/helpers/terraform-config-json/vars.auto.json delete mode 100644 test/helpers/terraform-config/backend.tf delete mode 100644 test/helpers/terraform-config/blocks.tf delete mode 100644 test/helpers/terraform-config/cloudwatch.tf delete mode 100644 test/helpers/terraform-config/data_sources.tf delete mode 100644 test/helpers/terraform-config/empty-heredoc.hcl2 delete mode 100644 test/helpers/terraform-config/escapes.tf delete mode 100644 test/helpers/terraform-config/iam.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_condition.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_function.tf delete mode 100644 test/helpers/terraform-config/locals_embedded_multi_function_nested.tf delete mode 100644 test/helpers/terraform-config/multiline_expressions.tf delete mode 100644 test/helpers/terraform-config/provider_function.tf delete mode 100644 test/helpers/terraform-config/resource_keyword_attribute.tf delete mode 100644 test/helpers/terraform-config/route_table.tf delete mode 100644 test/helpers/terraform-config/s3.tf delete mode 100644 test/helpers/terraform-config/variables.tf delete mode 100644 test/helpers/terraform-config/vars.auto.tfvars delete mode 100644 test/helpers/with-meta/data_sources.json delete mode 100644 test/helpers/with-meta/data_sources.tf diff --git a/test/helpers/terraform-config-json/backend.json b/test/helpers/terraform-config-json/backend.json deleted file mode 100644 index 482838c7..00000000 --- a/test/helpers/terraform-config-json/backend.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "provider": [ - { - "aws": { - "region": "${var.region}" - } - }, - { - "aws": { - "region": "${(var.backup_region)}", - "alias": "backup" - } - } - ], - "terraform": [ - { - "required_version": "0.12" - }, - { - "backend": [ - { - "gcs": {} - } - ], - "required_providers": [ - { - "aws": { - "source": "hashicorp/aws" - }, - "null": { - "source": "hashicorp/null" - }, - "template": { - "source": "hashicorp/template" - } - } - ] - } - ] -} diff --git a/test/helpers/terraform-config-json/blocks.json b/test/helpers/terraform-config-json/blocks.json deleted file mode 100644 index 716ece56..00000000 --- a/test/helpers/terraform-config-json/blocks.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "block": [ - { - "a": 1 - }, - { - "label": { - "b": 2, - "nested_block_1": [ - { - "a": { - "foo": "bar" - } - }, - { - "a": { - "b": { - "bar": "foo" - } - } - }, - { - "foobar": "barfoo" - } - ], - "nested_block_2": [ - { - "barfoo": "foobar" - } - ] - } - } - ] -} diff --git a/test/helpers/terraform-config-json/cloudwatch.json b/test/helpers/terraform-config-json/cloudwatch.json deleted file mode 100644 index f9dafc99..00000000 --- a/test/helpers/terraform-config-json/cloudwatch.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "resource": [ - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule": { - "name": "name", - "event_pattern": " {\n \"foo\": \"bar\",\n \"foo2\": \"EOF_CONFIG\"\n }" - } - } - }, - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule2": { - "name": "name", - "event_pattern": "{\n \"foo\": \"bar\",\n \"foo2\": \"EOF_CONFIG\"\n}" - } - } - }, - { - "aws_cloudwatch_event_rule": { - "aws_cloudwatch_event_rule2": { - "name": "name", - "event_pattern": "${jsonencode(var.cloudwatch_pattern_deploytool)}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/data_sources.json b/test/helpers/terraform-config-json/data_sources.json deleted file mode 100644 index f159c937..00000000 --- a/test/helpers/terraform-config-json/data_sources.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "data": [ - { - "terraform_remote_state": { - "map": { - "for_each": "${{for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : regex(local.remote_state_regex, s3_bucket_key)[\"account_alias\"] => s3_bucket_key if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0}}", - "backend": "s3" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/empty-heredoc.json b/test/helpers/terraform-config-json/empty-heredoc.json deleted file mode 100644 index c1989c0d..00000000 --- a/test/helpers/terraform-config-json/empty-heredoc.json +++ /dev/null @@ -1 +0,0 @@ -{"bar": ""} diff --git a/test/helpers/terraform-config-json/escapes.json b/test/helpers/terraform-config-json/escapes.json deleted file mode 100644 index 41c7d54f..00000000 --- a/test/helpers/terraform-config-json/escapes.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "block": [ - { - "block_with_newlines": { - "a": "line1\nline2" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/iam.json b/test/helpers/terraform-config-json/iam.json deleted file mode 100644 index 8705360e..00000000 --- a/test/helpers/terraform-config-json/iam.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "data": [ - { - "aws_iam_policy_document": { - "policy": { - "statement": [ - { - "effect": "Deny", - "principals": [ - { - "type": "AWS", - "identifiers": [ - "*" - ] - } - ], - "actions": [ - "s3:PutObjectAcl" - ], - "resources": "${aws_s3_bucket.bucket.*.arn.bar}" - } - ] - } - } - }, - { - "aws_iam_policy_document": { - "s3_proxy_policy": { - "statement": [ - { - "actions": [ - "s3:GetObject" - ], - "resources": "${[for bucket_name in local.buckets_to_proxy : \"arn:aws:s3:::${bucket_name}/*\" if substr(bucket_name, 0, 1) == \"l\"]}" - } - ] - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_condition.json b/test/helpers/terraform-config-json/locals_embedded_condition.json deleted file mode 100644 index 6c41e5e8..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_condition.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "locals": [ - { - "terraform": { - "channels": "${(local.running_in_ci ? local.ci_channels : local.local_channels)}", - "authentication": [], - "foo": null - } - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_function.json b/test/helpers/terraform-config-json/locals_embedded_function.json deleted file mode 100644 index 51cf6454..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_function.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "locals": [ - { - "function_test": "${var.basename}-${var.forwarder_function_name}_${md5(\"${var.vpc_id}${data.aws_region.current.name}\")}" - } - ] -} diff --git a/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json b/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json deleted file mode 100644 index f210a087..00000000 --- a/test/helpers/terraform-config-json/locals_embedded_multi_function_nested.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "locals": [ - { - "multi_function": "${substr(split(\"-\", \"us-west-2\")[0], 0, 1)}", - "multi_function_embedded": "${substr(split(\"-\", \"us-west-2\")[0], 0, 1)}" - } - ] -} diff --git a/test/helpers/terraform-config-json/multiline_expressions.json b/test/helpers/terraform-config-json/multiline_expressions.json deleted file mode 100644 index 7f3405c0..00000000 --- a/test/helpers/terraform-config-json/multiline_expressions.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "resource": [ - { - "null_resource": { - "multiline_comment_multiline": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_before_closing_bracket": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_between_brackets": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_single_line_after_opening_bracket": { - "triggers": [] - } - } - }, - { - "null_resource": { - "multiline_comment_multiple_single_element": { - "triggers": [ - 2 - ] - } - } - } - ], - "variable": [ - { - "some_var2": { - "description": "description", - "type": "string", - "default": "${cidrsubnets(\"10.0.0.0/24\", 2, 2)}" - } - }, - { - "some_var3": { - "description": "description", - "default": "${concat([{\"1\": \"1\"}], [{\"2\": \"2\"}])}" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/nulls.json b/test/helpers/terraform-config-json/nulls.json deleted file mode 100644 index d4a9d448..00000000 --- a/test/helpers/terraform-config-json/nulls.json +++ /dev/null @@ -1 +0,0 @@ -{"terraform": {"unary": "${!null}", "binary": "${(a == null)}", "tuple": [null, 1, 2], "single": null, "conditional": "${null ? null : null}"}} diff --git a/test/helpers/terraform-config-json/provider_function.json b/test/helpers/terraform-config-json/provider_function.json deleted file mode 100644 index 2b749c13..00000000 --- a/test/helpers/terraform-config-json/provider_function.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "locals": [ - { - "name2": "${provider::test2::test(\"a\")}", - "name3": "${test(\"a\")}" - } - ] -} diff --git a/test/helpers/terraform-config-json/resource_keyword_attribute.json b/test/helpers/terraform-config-json/resource_keyword_attribute.json deleted file mode 100644 index 11ff88f9..00000000 --- a/test/helpers/terraform-config-json/resource_keyword_attribute.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "resource": [ - { - "custom_provider_resource": { - "resource_name": { - "name": "resource_name", - "attribute": "attribute_value", - "if" : "attribute_value2", - "in" : "attribute_value3", - "for" : "attribute_value4", - "for_each" : "attribute_value5" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/route_table.json b/test/helpers/terraform-config-json/route_table.json deleted file mode 100644 index af21a922..00000000 --- a/test/helpers/terraform-config-json/route_table.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "resource": [ - { - "aws_route": { - "tgw": { - "count": "${(var.tgw_name == \"\" ? 0 : var.number_of_az)}", - "route_table_id": "${aws_route_table.rt[count.index].id}", - "destination_cidr_block": "10.0.0.0/8", - "transit_gateway_id": "${data.aws_ec2_transit_gateway.tgw[0].id}" - } - } - }, - { - "aws_route": { - "tgw-dot-index": { - "count": "${(var.tgw_name == \"\" ? 0 : var.number_of_az)}", - "route_table_id": "${aws_route_table.rt[count.index].id}", - "destination_cidr_block": "10.0.0.0/8", - "transit_gateway_id": "${data.aws_ec2_transit_gateway.tgw[0].id}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/s3.json b/test/helpers/terraform-config-json/s3.json deleted file mode 100644 index d3318a21..00000000 --- a/test/helpers/terraform-config-json/s3.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "resource": [ - { - "aws_s3_bucket": { - "name": { - "bucket": "name", - "acl": "log-delivery-write", - "lifecycle_rule": [ - { - "id": "to_glacier", - "prefix": "", - "enabled": true, - "expiration": [ - { - "days": 365 - } - ], - "transition": { - "days": 30, - "storage_class": "GLACIER" - } - } - ], - "versioning": [ - { - "enabled": true - } - ] - } - } - } - ], - "module": [ - { - "bucket_name": { - "source": "s3_bucket_name", - "name": "audit", - "account": "${var.account}", - "region": "${var.region}", - "providers": { - "aws.ue1": "${aws}", - "aws.uw2.attribute": "${aws.backup}" - } - } - } - ] -} diff --git a/test/helpers/terraform-config-json/string_interpolations.json b/test/helpers/terraform-config-json/string_interpolations.json deleted file mode 100644 index 885baf89..00000000 --- a/test/helpers/terraform-config-json/string_interpolations.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "locals": [ - { - "simple_interpolation": "prefix:${var.foo}-suffix", - "embedded_interpolation": "(long substring without interpolation); ${module.special_constants.aws_accounts[\"aaa-${local.foo}-${local.bar}\"]}/us-west-2/key_foo", - "deeply_nested_interpolation": "prefix1-${\"prefix2-${\"prefix3-$${foo:bar}\"}\"}", - "escaped_interpolation": "prefix:$${aws:username}-suffix", - "simple_and_escaped": "${\"bar\"}$${baz:bat}", - "simple_and_escaped_reversed": "$${baz:bat}${\"bar\"}", - "nested_escaped": "bar-${\"$${baz:bat}\"}" - } - ] -} diff --git a/test/helpers/terraform-config-json/test_floats.json b/test/helpers/terraform-config-json/test_floats.json deleted file mode 100644 index 87ed65c3..00000000 --- a/test/helpers/terraform-config-json/test_floats.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "locals": [ - { - "simple_float": 123.456, - "small_float": 0.123, - "large_float": 9876543.21, - "negative_float": -42.5, - "negative_small": -0.001, - "scientific_positive": "${1.23e5}", - "scientific_negative": "${9.87e-3}", - "scientific_large": "${6.022e+23}", - "integer_as_float": 100.0, - "float_calculation": "${105e+2 * 3.0 / 2.1}", - "float_comparison": "${5e1 > 2.3 ? 1.0 : 0.0}", - "float_list": [ - 1.1, - 2.2, - 3.3, - -4.4, - "${5.5e2}" - ], - "float_object": { - "pi": 3.14159, - "euler": 2.71828, - "sqrt2": 1.41421, - "scientific": "${-123e+2}" - } - } - ] -} diff --git a/test/helpers/terraform-config-json/unicode_strings.json b/test/helpers/terraform-config-json/unicode_strings.json deleted file mode 100644 index 8eedf932..00000000 --- a/test/helpers/terraform-config-json/unicode_strings.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "locals": [ - { - "basic_unicode": "Hello, 世界! こんにちは Привет नमस्ते", - "unicode_escapes": "© ♥ ♪ ☠ ☺", - "emoji_string": "🚀 🌍 🔥 🎉", - "rtl_text": "English and العربية text mixed", - "complex_unicode": "Python (파이썬) es 很棒的! ♥ αβγδ", - "ascii": "ASCII: abc123", - "emoji": "Emoji: 🚀🌍🔥🎉", - "math": "Math: ∑∫√∞≠≤≥", - "currency": "Currency: £€¥₹₽₩", - "arrows": "Arrows: ←↑→↓↔↕", - "cjk": "CJK: 你好世界안녕하세요こんにちは", - "cyrillic": "Cyrillic: Привет мир", - "special": "Special: ©®™§¶†‡", - "mixed_content": "Line with interpolation: ${var.name}\nLine with emoji: 👨‍👩‍👧‍👦\nLine with quotes: \"quoted text\"\nLine with backslash: \\escaped" - } - ] -} diff --git a/test/helpers/terraform-config-json/variables.json b/test/helpers/terraform-config-json/variables.json deleted file mode 100644 index d344902c..00000000 --- a/test/helpers/terraform-config-json/variables.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "variable": [ - { - "region": {} - }, - { - "account": {} - }, - { - "azs": { - "default": { - "us-west-1": "us-west-1c,us-west-1b", - "us-west-2": "us-west-2c,us-west-2b,us-west-2a", - "us-east-1": "us-east-1c,us-east-1b,us-east-1a", - "eu-central-1": "eu-central-1a,eu-central-1b,eu-central-1c", - "sa-east-1": "sa-east-1a,sa-east-1c", - "ap-northeast-1": "ap-northeast-1a,ap-northeast-1c,ap-northeast-1d", - "ap-southeast-1": "ap-southeast-1a,ap-southeast-1b,ap-southeast-1c", - "ap-southeast-2": "ap-southeast-2a,ap-southeast-2b,ap-southeast-2c" - } - } - }, - { - "options": { - "type": "string", - "default": {} - } - }, - { - "var_with_validation": { - "type": "${list(object({\"id\": \"string\", \"nested\": \"${list(object({\"id\": \"string\", \"type\": \"string\"}))}\"}))}", - "validation": [ - { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].id) : can(regex(\"^(A|B)$\", v))], false)}", - "error_message": "The property `id` must be one of value [A, B]." - }, - { - "condition": "${!contains([for v in flatten(var.var_with_validation[*].nested[*].type) : can(regex(\"^(A|B)$\", v))], false)}", - "error_message": "The property `nested.type` must be one of value [A, B]." - } - ] - } - } - ], - "locals": [ - { - "foo": "${var.account}_bar", - "bar": { - "baz": 1, - "${(var.account)}": 2, - "${(format(\"key_prefix_%s\", local.foo))}": 3, - "\"prefix_${var.account}:${var.user}_suffix\"": "interpolation" - }, - "tuple": ["${local.foo}"], - "empty_tuple": [] - }, - { - "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {\"aws_account_ids\": \"${[for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}\"} ...}}", - "has_valid_forwarding_rules_template_inputs": "${(length(keys(var.forwarding_rules_template.copy_resolver_rules)) > 0 && length(var.forwarding_rules_template.replace_with_target_ips) > 0 && length(var.forwarding_rules_template.exclude_cidrs) > 0)}", - "for_whitespace": "${{for i in [1, 2, 3] : i => i ...}}" - }, - { - "nested_data": [ - { - "id": 1, - "nested": [ - { - "id": "a", - "again": [ - { - "id": "a1" - }, - { - "id": "b1" - } - ] - }, - { - "id": "c" - } - ] - }, - { - "id": 1, - "nested": [ - { - "id": "a", - "again": [ - { - "id": "a2" - }, - { - "id": "b2" - } - ] - }, - { - "id": "b", - "again": [ - { - "id": "a" - }, - { - "id": "b" - } - ] - } - ] - } - ], - "ids_level_1": "${distinct(local.nested_data[*].id)}", - "ids_level_2": "${flatten(local.nested_data[*].nested[*].id)}", - "ids_level_3": "${flatten(local.nested_data[*].nested[*].again[*][0].foo.bar[0])}", - "bindings_by_role": "${distinct(flatten([for name in local.real_entities : [for role , members in var.bindings : {\"name\": \"${name}\", \"role\": \"${role}\", \"members\": \"${members}\"}]]))}" - } - ] -} diff --git a/test/helpers/terraform-config-json/vars.auto.json b/test/helpers/terraform-config-json/vars.auto.json deleted file mode 100644 index e8ead394..00000000 --- a/test/helpers/terraform-config-json/vars.auto.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "foo": "bar", - "arr": [ - "foo", - "bar" - ] -} diff --git a/test/helpers/terraform-config/backend.tf b/test/helpers/terraform-config/backend.tf deleted file mode 100644 index bd22a869..00000000 --- a/test/helpers/terraform-config/backend.tf +++ /dev/null @@ -1,31 +0,0 @@ -// test new line braces style -provider "aws" -{ - region = var.region -} - -# another comment -provider "aws" { - region = (var.backup_region) - alias = "backup" -} - -/* -one last comment -*/ -terraform { required_version = "0.12" } - -terraform { - backend "gcs" {} - required_providers { - aws = { - source = "hashicorp/aws", - } - null = { - source = "hashicorp/null", - } - template = { - source = "hashicorp/template", - } - } -} diff --git a/test/helpers/terraform-config/blocks.tf b/test/helpers/terraform-config/blocks.tf deleted file mode 100644 index bd8e5159..00000000 --- a/test/helpers/terraform-config/blocks.tf +++ /dev/null @@ -1,22 +0,0 @@ -block { - a = 1 -} - -block "label" { - b = 2 - nested_block_1 "a" { - foo = "bar" - } - - nested_block_1 "a" "b" { - bar = "foo" - } - - nested_block_1 { - foobar = "barfoo" - } - - nested_block_2 { - barfoo = "foobar" - } -} diff --git a/test/helpers/terraform-config/cloudwatch.tf b/test/helpers/terraform-config/cloudwatch.tf deleted file mode 100644 index 8928b810..00000000 --- a/test/helpers/terraform-config/cloudwatch.tf +++ /dev/null @@ -1,24 +0,0 @@ -resource "aws_cloudwatch_event_rule" "aws_cloudwatch_event_rule" { - name = "name" - event_pattern = < s3_bucket_key - if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0 - } - backend = "s3" -} diff --git a/test/helpers/terraform-config/empty-heredoc.hcl2 b/test/helpers/terraform-config/empty-heredoc.hcl2 deleted file mode 100644 index c701dac2..00000000 --- a/test/helpers/terraform-config/empty-heredoc.hcl2 +++ /dev/null @@ -1,2 +0,0 @@ -bar = < { - aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[ - forwarding_rule_key - ].aws_account_names : - module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] - ] - } - ... - } - has_valid_forwarding_rules_template_inputs = ( - length(keys(var.forwarding_rules_template.copy_resolver_rules)) > 0 - && length(var.forwarding_rules_template.replace_with_target_ips) > 0 && - length(var.forwarding_rules_template.exclude_cidrs) > 0 - ) - - for_whitespace = { for i in [1, 2, 3] : - i => - i ... - } -} - -locals { - nested_data = [ - { - id = 1, - nested = [ - { - id = "a" - again = [ - { id = "a1" }, - { id = "b1" } - ] - }, - { id = "c" } - ] - }, - { - id = 1 - nested = [ - { - id = "a" - again = [ - { id = "a2" }, - { id = "b2" } - ] - }, - { - id = "b" - again = [ - { id = "a" }, - { id = "b" } - ] - } - ] - } - ] - - ids_level_1 = distinct(local.nested_data[*].id) - ids_level_2 = flatten(local.nested_data[*].nested[*].id) - ids_level_3 = flatten(local.nested_data[*].nested[*].again[*][0].foo.bar[0]) - bindings_by_role = distinct(flatten([ - for name in local.real_entities - : [ - for role, members in var.bindings - : { name = name, role = role, members = members } - ] - ])) -} diff --git a/test/helpers/terraform-config/vars.auto.tfvars b/test/helpers/terraform-config/vars.auto.tfvars deleted file mode 100644 index 9fd3a49d..00000000 --- a/test/helpers/terraform-config/vars.auto.tfvars +++ /dev/null @@ -1,2 +0,0 @@ -foo = "bar" -arr = ["foo", "bar"] diff --git a/test/helpers/with-meta/data_sources.json b/test/helpers/with-meta/data_sources.json deleted file mode 100644 index f04e0ff9..00000000 --- a/test/helpers/with-meta/data_sources.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "data": [ - { - "terraform_remote_state": { - "map": { - "for_each": "${{for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : regex(local.remote_state_regex, s3_bucket_key)[\"account_alias\"] => s3_bucket_key if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0}}", - "backend": "s3", - "__start_line__": 1, - "__end_line__": 8 - } - } - } - ] -} diff --git a/test/helpers/with-meta/data_sources.tf b/test/helpers/with-meta/data_sources.tf deleted file mode 100644 index 8e4cc25a..00000000 --- a/test/helpers/with-meta/data_sources.tf +++ /dev/null @@ -1,8 +0,0 @@ -data "terraform_remote_state" "map" { - for_each = { - for s3_bucket_key in data.aws_s3_bucket_objects.remote_state_objects.keys : - regex(local.remote_state_regex, s3_bucket_key)["account_alias"] => s3_bucket_key - if length(regexall(local.remote_state_regex, s3_bucket_key)) > 0 - } - backend = "s3" -} From 4c08d6eb0b04e824097eba39aaea2950177b5a90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:14:28 +0100 Subject: [PATCH 38/42] fix some minor issues; add more cli tests --- hcl2/formatter.py | 3 +- hcl2/rules/base.py | 4 +- hcl2/rules/expressions.py | 8 +++- hcl2/transformer.py | 4 -- hcl2/utils.py | 7 ---- test/unit/cli/test_hcl_to_json.py | 64 ++++++++++++++++++++++++++++++- test/unit/cli/test_json_to_hcl.py | 61 +++++++++++++++++++++++++++++ test/unit/test_api.py | 24 +++++++++++- test/unit/test_utils.py | 7 ---- 9 files changed, 155 insertions(+), 27 deletions(-) diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 1b0702c9..29a9c6f6 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -293,7 +293,8 @@ def _build_newline( return result def _deindent_last_line(self, times: int = 1): - assert self._last_new_line is not None + if self._last_new_line is None: + return token = self._last_new_line.token for _ in range(times): if token.value.endswith(" " * self.options.indent_length): diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index 26a31247..cf129ce1 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -10,7 +10,7 @@ from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule -from hcl2.rules.tokens import NAME, EQ, LBRACE, RBRACE +from hcl2.rules.tokens import EQ, LBRACE, RBRACE from hcl2.rules.whitespace import NewLineOrCommentRule from hcl2.utils import SerializationOptions, SerializationContext @@ -148,7 +148,7 @@ def lark_name() -> str: return "block" @property - def labels(self) -> List[NAME]: + def labels(self) -> List[Union[IdentifierRule, StringRule]]: """Return the block label chain (type name, optional string labels).""" return list(filter(lambda label: label is not None, self._labels)) diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index e6aa1670..c29859a3 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -27,8 +27,8 @@ class ExpressionRule(InlineCommentMixIn, ABC): @staticmethod def lark_name() -> str: - """Return the grammar rule name.""" - return "expression" + """?expression is transparent in Lark — subclasses must override.""" + raise NotImplementedError("ExpressionRule.lark_name() must be overridden") def __init__( self, children, meta: Optional[Meta] = None, parentheses: bool = False @@ -221,6 +221,10 @@ class BinaryOpRule(ExpressionRule): Optional[NewLineOrCommentRule], ] + def __init__(self, children, meta: Optional[Meta] = None): + self._insert_optionals(children, [2]) + super().__init__(children, meta) + @staticmethod def lark_name() -> str: """Return the grammar rule name.""" diff --git a/hcl2/transformer.py b/hcl2/transformer.py index d483cd90..d0a09630 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -217,10 +217,6 @@ def arguments(self, meta: Meta, args) -> ArgumentsRule: def function_call(self, meta: Meta, args) -> FunctionCallRule: return FunctionCallRule(args, meta) - # @v_args(meta=True) - # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: - # return ProviderFunctionCallRule(args, meta) - @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) diff --git a/hcl2/utils.py b/hcl2/utils.py index 4eb31470..7e349558 100644 --- a/hcl2/utils.py +++ b/hcl2/utils.py @@ -2,7 +2,6 @@ import re from contextlib import contextmanager from dataclasses import dataclass, replace -from typing import Generator HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) @@ -33,12 +32,6 @@ def replace(self, **kwargs) -> "SerializationContext": """Return a new context with the given fields overridden.""" return replace(self, **kwargs) - @contextmanager - def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: - """Context manager that yields a modified copy of the context""" - modified_context = self.replace(**kwargs) - yield modified_context - @contextmanager def modify(self, **kwargs): """Context manager that temporarily mutates fields, restoring on exit.""" diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 0b40c896..1615487f 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -185,8 +185,10 @@ def test_skip_error_with_output_file(self): with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): main() - if os.path.exists(out_path): - self.assertEqual(_read_file(out_path), "") + # The output file is created (opened for writing) before + # conversion; on a skipped error it will be empty. + self.assertTrue(os.path.exists(out_path)) + self.assertEqual(_read_file(out_path), "") def test_raise_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -222,6 +224,64 @@ def test_raise_error_to_stdout(self): main() +class TestHclToJsonFlags(TestCase): + def _run_hcl_to_json(self, hcl_content, extra_flags=None): + """Helper: write HCL to a temp file, run main() with flags, return parsed JSON.""" + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, hcl_content) + + stdout = StringIO() + argv = ["hcl2tojson"] + (extra_flags or []) + [hcl_path] + with patch("sys.argv", argv): + with patch("sys.stdout", stdout): + main() + return json.loads(stdout.getvalue()) + + def test_no_explicit_blocks_flag(self): + hcl = 'resource "a" "b" {\n x = 1\n}\n' + default = self._run_hcl_to_json(hcl) + no_blocks = self._run_hcl_to_json(hcl, ["--no-explicit-blocks"]) + # With explicit blocks, the value is wrapped in a list; without, it may differ + self.assertNotEqual(default, no_blocks) + + def test_no_preserve_heredocs_flag(self): + hcl = "x = < Date: Sat, 7 Mar 2026 23:34:47 +0100 Subject: [PATCH 39/42] fix another bunch of issues --- hcl2/deserializer.py | 7 ++-- hcl2/formatter.py | 38 +++++++++++--------- hcl2/reconstructor.py | 18 ---------- hcl2/rules/containers.py | 2 +- hcl2/rules/indexing.py | 8 ++--- hcl2/rules/strings.py | 8 +++-- test/integration/hcl2_reconstructed/smoke.tf | 2 +- test/unit/rules/test_containers.py | 4 ++- test/unit/test_deserializer.py | 4 +-- test/unit/test_formatter.py | 37 +++++++++++++++++++ 10 files changed, 80 insertions(+), 48 deletions(-) diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index ae6cbc15..212dcc6f 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -317,14 +317,17 @@ def _deserialize_object(self, value: dict) -> ObjectRule: return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: + key_rule: Union[ObjectElemKeyExpressionRule, ObjectElemKeyRule] + if self._is_expression(key): expr = self._deserialize_expression(key) - key = ObjectElemKeyExpressionRule([expr]) + key_rule = ObjectElemKeyExpressionRule([expr]) else: key = self._deserialize_text(key) + key_rule = ObjectElemKeyRule([key]) result = [ - ObjectElemKeyRule([key]), + key_rule, COLON() if self.options.object_elements_colon else EQ(), ExprTermRule([self._deserialize(value)]), ] diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 29a9c6f6..c1bac9df 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from typing import List, Optional -from hcl2.rules.abstract import LarkElement +from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( StartRule, BlockRule, @@ -13,7 +13,6 @@ from hcl2.rules.containers import ( ObjectRule, ObjectElemRule, - ObjectElemKeyRule, ObjectElemKeyExpressionRule, TupleRule, ) @@ -104,7 +103,7 @@ def format_body_rule(self, rule: BodyRule, indent_level: int = 0): if new_children: new_children.pop(-1) - rule._children = new_children + self._set_children(rule, new_children) def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): """Format an attribute rule by formatting its value expression.""" @@ -127,7 +126,7 @@ def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): new_children.append(self._build_newline(indent_level)) self._deindent_last_line() - rule._children = new_children + self._set_children(rule, new_children) def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): """Format an object rule with one element per line and optional alignment.""" @@ -157,7 +156,7 @@ def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): new_children.insert(-1, self._build_newline(indent_level)) self._deindent_last_line() - rule._children = new_children + self._set_children(rule, new_children) if self.options.vertically_align_object_elements: self._vertically_align_object_elems(rule) @@ -215,8 +214,10 @@ def format_forobjectexpr( for index in [1, 3]: expression.children[index] = self._build_newline(indent_level) - expression.children[6] = None - expression.children[8] = None + for index in [6, 8]: + child = expression.children[index] + if not isinstance(child, NewLineOrCommentRule) or child.to_list() is None: + expression.children[index] = None if expression.condition is not None: expression.children[10] = self._build_newline(indent_level) @@ -226,6 +227,15 @@ def format_forobjectexpr( expression.children[12] = self._build_newline(indent_level) self._deindent_last_line() + @staticmethod + def _set_children(rule: LarkRule, new_children): + """Replace a rule's children and re-establish parent/index links.""" + rule._children = new_children + for i, child in enumerate(new_children): + if child is not None: + child.set_index(i) + child.set_parent(rule) + def _vertically_align_attributes_in_body(self, body: BodyRule): attributes_sequence: List[AttributeRule] = [] @@ -247,9 +257,8 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): for attribute in attributes_sequence: name_length = len(attribute.identifier.token.value) spaces_to_add = max_length - name_length - attribute.children[1].set_value( - " " * spaces_to_add + attribute.children[1].value - ) + base = attribute.children[1].value.lstrip(" ") + attribute.children[1].set_value(" " * spaces_to_add + base) def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(self._key_text_width(elem.key) for elem in rule.elements) @@ -262,21 +271,16 @@ def _vertically_align_object_elems(self, rule: ObjectRule): if isinstance(separator, COLON): # type: ignore[misc] spaces_to_add += 1 - elem.children[1].set_value(" " * spaces_to_add + separator.value) + base = separator.value.lstrip(" ") + elem.children[1].set_value(" " * spaces_to_add + base) @staticmethod def _key_text_width(key: LarkElement) -> int: """Compute the HCL text width of an object element key.""" width = len(str(key.serialize())) # Expression keys serialize with ${...} wrapping (+3 chars vs HCL text). - # Handle both direct ObjectElemKeyExpressionRule (from parser) and - # ObjectElemKeyRule wrapping one (from deserializer). if isinstance(key, ObjectElemKeyExpressionRule): width -= 3 - elif isinstance(key, ObjectElemKeyRule) and isinstance( - key.value, ObjectElemKeyExpressionRule - ): - width -= 3 return width def _build_newline( diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index b9f3b3ce..fd437347 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -38,9 +38,6 @@ def __init__(self): self._current_indent = 0 self._last_token_name: Optional[str] = None self._last_rule_name: Optional[str] = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False def _reset_state(self): """Reset state tracking for formatting decisions.""" @@ -48,9 +45,6 @@ def _reset_state(self): self._current_indent = 0 self._last_token_name = None self._last_rule_name = None - self._in_parentheses = False - self._in_object = False - self._in_tuple = False # pylint:disable=R0911,R0912 def _should_add_space_before( @@ -178,21 +172,9 @@ def _reconstruct_tree( self._last_was_space = True elif rule_name == ExprTermRule.lark_name(): - # Check if parenthesized - if ( - len(tree.children) >= 3 - and isinstance(tree.children[0], Token) - and tree.children[0].type == tokens.LPAR.lark_name() - and isinstance(tree.children[-1], Token) - and tree.children[-1].type == tokens.RPAR.lark_name() - ): - self._in_parentheses = True - for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) - self._in_parentheses = False - else: for child in tree.children: result.extend(self._reconstruct_node(child, rule_name)) diff --git a/hcl2/rules/containers.py b/hcl2/rules/containers.py index 1ca5f69b..671d98b7 100644 --- a/hcl2/rules/containers.py +++ b/hcl2/rules/containers.py @@ -124,7 +124,7 @@ class ObjectElemKeyExpressionRule(LarkRule): @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "object_elem_key" + return "object_elem_key_expr" @property def expression(self) -> ExpressionRule: diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index 5cfefb96..455ae6ef 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -47,7 +47,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to '.N' string.""" - return f".{self.index.serialize(options)}" + return f".{self.index.serialize(options, context)}" class SqbIndexRule(InlineCommentMixIn): @@ -75,7 +75,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to '[expr]' string.""" - return f"[{self.index_expression.serialize(options)}]" + return f"[{self.index_expression.serialize(options, context)}]" def __init__(self, children, meta: Optional[Meta] = None): self._insert_optionals(children, [1, 3]) @@ -97,8 +97,8 @@ def serialize( ) -> Any: """Serialize to 'expr[index]' string.""" with context.modify(inside_dollar_string=True): - expr = self.children[0].serialize(options) - index = self.children[1].serialize(options) + expr = self.children[0].serialize(options, context) + index = self.children[1].serialize(options, context) result = f"{expr}{index}" if not context.inside_dollar_string: result = to_dollar_string(result) diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index c56e6e79..0303adfb 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -46,7 +46,7 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to ${expression} string.""" - return to_dollar_string(self.expression.serialize(options)) + return to_dollar_string(self.expression.serialize(options, context)) class StringPartRule(LarkRule): @@ -92,7 +92,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: """Serialize to a quoted string.""" - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return ( + '"' + + "".join(part.serialize(options, context) for part in self.string_parts) + + '"' + ) class HeredocTemplateRule(LarkRule): diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 743cf9ac..d05e4a4f 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -43,7 +43,7 @@ block { route53_forwarding_rule_shares = { for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { - aws_account_ids = [ + aws_account_ids = [ for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] ] diff --git a/test/unit/rules/test_containers.py b/test/unit/rules/test_containers.py index 0231987d..526b0216 100644 --- a/test/unit/rules/test_containers.py +++ b/test/unit/rules/test_containers.py @@ -184,7 +184,9 @@ def test_serialize_string(self): class TestObjectElemKeyExpressionRule(TestCase): def test_lark_name(self): - self.assertEqual(ObjectElemKeyExpressionRule.lark_name(), "object_elem_key") + self.assertEqual( + ObjectElemKeyExpressionRule.lark_name(), "object_elem_key_expr" + ) def test_expression_property(self): expr = StubExpression("1 + 1") diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 8ae7db67..54ecfa1a 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -371,14 +371,14 @@ def test_expression_key_object_element(self): result = d._deserialize_object_elem("${(var.key)}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + self.assertIsInstance(key_rule, ObjectElemKeyExpressionRule) def test_bare_expression_key_object_element(self): d = _deser() result = d._deserialize_object_elem("${1 + 1}", 1) self.assertIsInstance(result, ObjectElemRule) key_rule = result.key - self.assertIsInstance(key_rule.value, ObjectElemKeyExpressionRule) + self.assertIsInstance(key_rule, ObjectElemKeyExpressionRule) def test_object_elem_value_is_expr_term(self): d = _deser() diff --git a/test/unit/test_formatter.py b/test/unit/test_formatter.py index 0de4eadd..eceb1f65 100644 --- a/test/unit/test_formatter.py +++ b/test/unit/test_formatter.py @@ -790,3 +790,40 @@ def test_for_cond_expression_formatting(self): nlc_count = sum(1 for c in obj._children if isinstance(c, NewLineOrCommentRule)) self.assertGreater(nlc_count, 0) + + +# --- alignment idempotency --- + + +class TestAlignmentIdempotency(TestCase): + """Alignment must not double-pad when applied multiple times (#7).""" + + def test_attribute_alignment_does_not_double_pad(self): + """Running _vertically_align_attributes_in_body twice produces same padding.""" + f = _fmt() + attr_short = _make_attribute("a", "x") + attr_long = _make_attribute("long_name", "y") + body = BodyRule([attr_short, attr_long]) + + f._vertically_align_attributes_in_body(body) + eq_val_first = attr_short.children[1].value + + f._vertically_align_attributes_in_body(body) + eq_val_second = attr_short.children[1].value + + self.assertEqual(eq_val_first, eq_val_second) + + def test_object_elem_alignment_does_not_double_pad(self): + """Running _vertically_align_object_elems twice produces same padding.""" + f = _fmt() + elem_short = _make_object_elem("a", "x") + elem_long = _make_object_elem("long_key", "y") + obj = _make_object([elem_short, elem_long], trailing_commas=False) + + f._vertically_align_object_elems(obj) + sep_val_first = elem_short.children[1].value + + f._vertically_align_object_elems(obj) + sep_val_second = elem_short.children[1].value + + self.assertEqual(sep_val_first, sep_val_second) From e893e7d65b79bae79e0ecd22b524ec9068c7761b Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 7 Mar 2026 23:55:17 +0100 Subject: [PATCH 40/42] fix another bunch of issues --- cli/hcl_to_json.py | 1 - hcl2/reconstructor.py | 28 +++++++++++++++--- hcl2/rules/base.py | 2 +- hcl2/rules/for_expressions.py | 3 +- hcl2/rules/indexing.py | 3 +- hcl2/rules/whitespace.py | 7 ++--- test/integration/hcl2_reconstructed/smoke.tf | 4 +-- test/unit/cli/test_hcl_to_json.py | 30 ++++++++++++++++++++ test/unit/rules/test_base.py | 7 +++++ test/unit/rules/test_for_expressions.py | 24 ++++++++++++++++ test/unit/rules/test_whitespace.py | 12 ++++++++ 11 files changed, 107 insertions(+), 14 deletions(-) diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index d4acf0e5..108c7b34 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -111,7 +111,6 @@ def main(): def convert(in_file, out_file): _hcl_to_json(in_file, out_file, options, json_indent=json_indent) - out_file.write("\n") if args.PATH == "-": _convert_stdin(convert) diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index fd437347..4760665a 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -4,7 +4,7 @@ from lark import Tree, Token from hcl2.rules import tokens from hcl2.rules.base import BlockRule -from hcl2.rules.for_expressions import ForIntroRule +from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule from hcl2.rules.expressions import ( @@ -71,9 +71,8 @@ def _should_add_space_before( return True # Space around Conditional Expression operators - if ( - parent_rule_name == ConditionalRule.lark_name() - and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + if parent_rule_name == ConditionalRule.lark_name() and ( + token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] or self._last_token_name in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] ): @@ -151,6 +150,27 @@ def _should_add_space_before( ]: return True + # Space after QMARK/COLON in conditional expressions + if ( + parent_rule_name == ConditionalRule.lark_name() + and self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after colon in for expressions (before value expression, + # but not before newline/comment which provides its own whitespace) + if ( + self._last_token_name == tokens.COLON.lark_name() + and parent_rule_name + in [ + ForTupleExprRule.lark_name(), + ForObjectExprRule.lark_name(), + ] + and rule_name != "new_line_or_comment" + ): + return True + return False def _reconstruct_tree( diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index cf129ce1..edbca7a7 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -82,7 +82,7 @@ def serialize( result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attribute_names.add(child) + attribute_names.add(child.identifier.serialize(options)) result.update(child.serialize(options)) if options.with_comments: # collect in-line comments from attribute assignments, expressions etc diff --git a/hcl2/rules/for_expressions.py b/hcl2/rules/for_expressions.py index a062e66a..eb018343 100644 --- a/hcl2/rules/for_expressions.py +++ b/hcl2/rules/for_expressions.py @@ -1,5 +1,6 @@ """Rule classes for HCL2 for-tuple and for-object expressions.""" +from dataclasses import replace from typing import Any, Tuple, Optional, List from lark.tree import Meta @@ -306,7 +307,7 @@ def serialize( result += f"{self.key_expr.serialize(options, context)} => " result += self.value_expr.serialize( - SerializationOptions(wrap_objects=True), context + replace(options, wrap_objects=True), context ) if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rules/indexing.py b/hcl2/rules/indexing.py index 455ae6ef..4cc292c0 100644 --- a/hcl2/rules/indexing.py +++ b/hcl2/rules/indexing.py @@ -13,6 +13,7 @@ LSQB, RSQB, ATTR_SPLAT, + FULL_SPLAT, ) from hcl2.rules.whitespace import ( InlineCommentMixIn, @@ -233,7 +234,7 @@ class FullSplatRule(LarkRule): """Rule for full splat expressions (e.g. [*].attr).""" _children_layout: Tuple[ - ATTR_SPLAT, + FULL_SPLAT, Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], ] diff --git a/hcl2/rules/whitespace.py b/hcl2/rules/whitespace.py index 8591fd20..6b28837e 100644 --- a/hcl2/rules/whitespace.py +++ b/hcl2/rules/whitespace.py @@ -43,12 +43,11 @@ def to_list( comment = comment.strip() for delimiter in ("//", "/*", "#"): - if comment.startswith(delimiter): comment = comment[len(delimiter) :] - - if comment.endswith("*/"): - comment = comment[:-2] + if delimiter == "/*" and comment.endswith("*/"): + comment = comment[:-2] + break if comment != "": result.append(comment.strip()) diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index d05e4a4f..c8529e70 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -41,10 +41,10 @@ block label1 label2 { block { route53_forwarding_rule_shares = { - for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : + for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : "${forwarding_rule_key}" => { aws_account_ids = [ - for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : + for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] ] } ... diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 1615487f..5c7e6f3a 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -49,6 +49,23 @@ def test_single_file_to_output(self): result = json.loads(_read_file(out_path)) self.assertEqual(result["x"], 1) + def test_single_file_to_stdout_single_trailing_newline(self): + with tempfile.TemporaryDirectory() as tmpdir: + hcl_path = os.path.join(tmpdir, "test.tf") + _write_file(hcl_path, SIMPLE_HCL) + + stdout = StringIO() + with patch("sys.argv", ["hcl2tojson", hcl_path]): + with patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertTrue(output.endswith("\n"), "output should end with newline") + self.assertFalse( + output.endswith("\n\n"), + "output should not have double trailing newline", + ) + def test_stdin(self): stdout = StringIO() stdin = StringIO(SIMPLE_HCL) @@ -59,6 +76,19 @@ def test_stdin(self): result = json.loads(stdout.getvalue()) self.assertEqual(result["x"], 1) + def test_stdin_single_trailing_newline(self): + stdout = StringIO() + stdin = StringIO(SIMPLE_HCL) + with patch("sys.argv", ["hcl2tojson", "-"]): + with patch("sys.stdin", stdin), patch("sys.stdout", stdout): + main() + + output = stdout.getvalue() + self.assertTrue(output.endswith("\n"), "output should end with newline") + self.assertFalse( + output.endswith("\n\n"), "output should not have double trailing newline" + ) + def test_directory_mode(self): with tempfile.TemporaryDirectory() as tmpdir: in_dir = os.path.join(tmpdir, "input") diff --git a/test/unit/rules/test_base.py b/test/unit/rules/test_base.py index bcf240a8..4dc51f92 100644 --- a/test/unit/rules/test_base.py +++ b/test/unit/rules/test_base.py @@ -168,6 +168,13 @@ def test_serialize_bare_newlines_not_collected_as_comments(self): result = body.serialize(options=SerializationOptions(with_comments=True)) self.assertNotIn("__comments__", result) + def test_serialize_raises_when_block_name_collides_with_attribute(self): + attr = _make_attribute("resource", "value") + block = _make_block([_make_identifier("resource")]) + body = BodyRule([attr, block]) + with self.assertRaises(RuntimeError): + body.serialize() + def test_serialize_skips_newline_children(self): nlc = _make_nlc("\n") attr = _make_attribute("x", 1) diff --git a/test/unit/rules/test_for_expressions.py b/test/unit/rules/test_for_expressions.py index 0691d81c..38cb90ea 100644 --- a/test/unit/rules/test_for_expressions.py +++ b/test/unit/rules/test_for_expressions.py @@ -34,9 +34,11 @@ class StubExpression(ExpressionRule): def __init__(self, value): self._stub_value = value + self._last_options = None super().__init__([], None) def serialize(self, options=SerializationOptions(), context=SerializationContext()): + self._last_options = options return self._stub_value @@ -390,3 +392,25 @@ def test_serialize_with_condition(self): result = rule.serialize() self.assertIn("if cond", result) self.assertEqual(result, "${{for k, v in items : key => value if cond}}") + + def test_serialize_preserves_caller_options(self): + value_expr = StubExpression("value") + rule = ForObjectExprRule( + [ + LBRACE(), + _make_for_intro_dual("k", "v", "items"), + StubExpression("key"), + FOR_OBJECT_ARROW(), + value_expr, + RBRACE(), + ] + ) + caller_options = SerializationOptions( + with_comments=True, preserve_heredocs=False + ) + rule.serialize(options=caller_options) + # value_expr should receive options with wrap_objects=True but + # all other caller settings preserved + self.assertTrue(value_expr._last_options.wrap_objects) + self.assertTrue(value_expr._last_options.with_comments) + self.assertFalse(value_expr._last_options.preserve_heredocs) diff --git a/test/unit/rules/test_whitespace.py b/test/unit/rules/test_whitespace.py index 351ca82c..49fde824 100644 --- a/test/unit/rules/test_whitespace.py +++ b/test/unit/rules/test_whitespace.py @@ -61,6 +61,18 @@ def test_to_list_block_comment(self): result = rule.to_list() self.assertEqual(result, ["block comment"]) + def test_to_list_line_comment_ending_in_block_close(self): + """A // comment ending in */ should preserve the */ suffix.""" + rule = _make_nlc("// comment ending in */\n") + result = rule.to_list() + self.assertEqual(result, ["comment ending in */"]) + + def test_to_list_hash_comment_ending_in_block_close(self): + """A # comment ending in */ should preserve the */ suffix.""" + rule = _make_nlc("# comment ending in */\n") + result = rule.to_list() + self.assertEqual(result, ["comment ending in */"]) + def test_to_list_multiple_comments(self): rule = _make_nlc("// first\n// second\n") result = rule.to_list() From 0c8a1c327d590c61b55380dcc260800d03fadf6b Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 8 Mar 2026 00:22:54 +0100 Subject: [PATCH 41/42] fix another bunch of issues --- cli/hcl_to_json.py | 2 +- cli/helpers.py | 4 ++++ cli/json_to_hcl.py | 2 +- hcl2/const.py | 1 + hcl2/deserializer.py | 4 ++-- hcl2/rules/base.py | 4 ++-- hcl2/rules/functions.py | 20 ----------------- hcl2/rules/strings.py | 2 +- test/unit/cli/test_hcl_to_json.py | 6 ++--- test/unit/rules/test_functions.py | 37 ------------------------------- test/unit/rules/test_strings.py | 2 +- test/unit/test_deserializer.py | 6 ++++- 12 files changed, 20 insertions(+), 70 deletions(-) diff --git a/cli/hcl_to_json.py b/cli/hcl_to_json.py index 108c7b34..7e9f7275 100644 --- a/cli/hcl_to_json.py +++ b/cli/hcl_to_json.py @@ -129,4 +129,4 @@ def convert(in_file, out_file): out_extension=".json", ) else: - raise RuntimeError("Invalid Path", args.PATH) + raise RuntimeError(f"Invalid Path: {args.PATH}") diff --git a/cli/helpers.py b/cli/helpers.py index 6d463d45..b7d48376 100644 --- a/cli/helpers.py +++ b/cli/helpers.py @@ -26,6 +26,8 @@ def _convert_single_file( convert_fn(in_file, out_file) except skippable: if skip: + if os.path.exists(out_path): + os.remove(out_path) return raise else: @@ -83,6 +85,8 @@ def _convert_directory( convert_fn(in_file, out_file) except skippable: if skip: + if os.path.exists(out_file_path): + os.remove(out_file_path) continue raise diff --git a/cli/json_to_hcl.py b/cli/json_to_hcl.py index 65caeb09..826b7796 100644 --- a/cli/json_to_hcl.py +++ b/cli/json_to_hcl.py @@ -133,4 +133,4 @@ def convert(in_file, out_file): out_extension=".tf", ) else: - raise RuntimeError("Invalid Path", args.PATH) + raise RuntimeError(f"Invalid Path: {args.PATH}") diff --git a/hcl2/const.py b/hcl2/const.py index c36a5321..555c56aa 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ IS_BLOCK = "__is_block__" COMMENTS_KEY = "__comments__" +INLINE_COMMENTS_KEY = "__inline_comments__" diff --git a/hcl2/deserializer.py b/hcl2/deserializer.py index 212dcc6f..5043985a 100644 --- a/hcl2/deserializer.py +++ b/hcl2/deserializer.py @@ -9,7 +9,7 @@ from regex import regex from hcl2.parser import parser as _get_parser -from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.const import IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY from hcl2.rules.abstract import LarkElement, LarkRule from hcl2.rules.base import ( BlockRule, @@ -336,7 +336,7 @@ def _deserialize_object_elem(self, key: Any, value: Any) -> ObjectElemRule: def _is_reserved_key(self, key: str) -> bool: """Check if a key is a reserved metadata key that should be skipped during deserialization.""" - return key in (IS_BLOCK, COMMENTS_KEY) + return key in (IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY) def _is_expression(self, value: Any) -> bool: return isinstance(value, str) and value.startswith("${") and value.endswith("}") diff --git a/hcl2/rules/base.py b/hcl2/rules/base.py index edbca7a7..540d6284 100644 --- a/hcl2/rules/base.py +++ b/hcl2/rules/base.py @@ -5,7 +5,7 @@ from lark.tree import Meta -from hcl2.const import IS_BLOCK +from hcl2.const import IS_BLOCK, INLINE_COMMENTS_KEY from hcl2.rules.abstract import LarkRule, LarkToken from hcl2.rules.expressions import ExprTermRule from hcl2.rules.literal_rules import IdentifierRule @@ -97,7 +97,7 @@ def serialize( if comments: result["__comments__"] = comments if inline_comments: - result["__inline_comments__"] = inline_comments + result[INLINE_COMMENTS_KEY] = inline_comments return result diff --git a/hcl2/rules/functions.py b/hcl2/rules/functions.py index e9722965..bd574ebe 100644 --- a/hcl2/rules/functions.py +++ b/hcl2/rules/functions.py @@ -111,23 +111,3 @@ def serialize( result = to_dollar_string(result) return result - - -class ProviderFunctionCallRule(FunctionCallRule): - """Rule for provider-namespaced function calls.""" - - _children_layout: Tuple[ - IdentifierRule, - IdentifierRule, - IdentifierRule, - LPAR, - Optional[NewLineOrCommentRule], - Optional[ArgumentsRule], - Optional[NewLineOrCommentRule], - RPAR, - ] - - @staticmethod - def lark_name() -> str: - """Return the grammar rule name.""" - return "provider_function_call" diff --git a/hcl2/rules/strings.py b/hcl2/rules/strings.py index 0303adfb..2a19a0f9 100644 --- a/hcl2/rules/strings.py +++ b/hcl2/rules/strings.py @@ -143,7 +143,7 @@ class HeredocTrimTemplateRule(HeredocTemplateRule): @staticmethod def lark_name() -> str: """Return the grammar rule name.""" - return "heredoc_trim_template" + return "heredoc_template_trim" def serialize( self, options=SerializationOptions(), context=SerializationContext() diff --git a/test/unit/cli/test_hcl_to_json.py b/test/unit/cli/test_hcl_to_json.py index 5c7e6f3a..4954d09c 100644 --- a/test/unit/cli/test_hcl_to_json.py +++ b/test/unit/cli/test_hcl_to_json.py @@ -215,10 +215,8 @@ def test_skip_error_with_output_file(self): with patch("sys.argv", ["hcl2tojson", "-s", in_path, out_path]): main() - # The output file is created (opened for writing) before - # conversion; on a skipped error it will be empty. - self.assertTrue(os.path.exists(out_path)) - self.assertEqual(_read_file(out_path), "") + # The partial output file is cleaned up on skipped errors. + self.assertFalse(os.path.exists(out_path)) def test_raise_error_with_output_file(self): with tempfile.TemporaryDirectory() as tmpdir: diff --git a/test/unit/rules/test_functions.py b/test/unit/rules/test_functions.py index 18a763bd..6d3146c0 100644 --- a/test/unit/rules/test_functions.py +++ b/test/unit/rules/test_functions.py @@ -5,7 +5,6 @@ from hcl2.rules.functions import ( ArgumentsRule, FunctionCallRule, - ProviderFunctionCallRule, ) from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.tokens import NAME, COMMA, ELLIPSIS, LPAR, RPAR, StringToken @@ -145,39 +144,3 @@ def test_arguments_with_colons_tokens(self): rule = FunctionCallRule(children) self.assertIsNotNone(rule.arguments) self.assertEqual(rule.serialize(), "${provider::func::aa(5)}") - - -# --- ProviderFunctionCallRule tests --- - - -class TestProviderFunctionCallRule(TestCase): - def test_lark_name(self): - self.assertEqual(ProviderFunctionCallRule.lark_name(), "provider_function_call") - - def test_inherits_function_call_rule(self): - self.assertTrue(issubclass(ProviderFunctionCallRule, FunctionCallRule)) - - def test_serialize_provider_function(self): - children = [ - _make_identifier("ns"), - _make_identifier("mod"), - _make_identifier("func"), - LPAR(), - _make_arguments(["a"]), - RPAR(), - ] - rule = ProviderFunctionCallRule(children) - self.assertEqual(rule.serialize(), "${ns::mod::func(a)}") - - def test_serialize_inside_dollar_string(self): - children = [ - _make_identifier("ns"), - _make_identifier("mod"), - _make_identifier("func"), - LPAR(), - _make_arguments(["a"]), - RPAR(), - ] - rule = ProviderFunctionCallRule(children) - ctx = SerializationContext(inside_dollar_string=True) - self.assertEqual(rule.serialize(context=ctx), "ns::mod::func(a)") diff --git a/test/unit/rules/test_strings.py b/test/unit/rules/test_strings.py index 9abf7eb6..b037d997 100644 --- a/test/unit/rules/test_strings.py +++ b/test/unit/rules/test_strings.py @@ -243,7 +243,7 @@ def test_serialize_no_preserve_invalid_raises(self): class TestHeredocTrimTemplateRule(TestCase): def test_lark_name(self): - self.assertEqual(HeredocTrimTemplateRule.lark_name(), "heredoc_trim_template") + self.assertEqual(HeredocTrimTemplateRule.lark_name(), "heredoc_template_trim") def test_serialize_preserve_heredocs_trims_indent(self): token = HEREDOC_TRIM_TEMPLATE("<<-EOF\n line1\n line2\nEOF") diff --git a/test/unit/test_deserializer.py b/test/unit/test_deserializer.py index 54ecfa1a..5ec25fe9 100644 --- a/test/unit/test_deserializer.py +++ b/test/unit/test_deserializer.py @@ -1,7 +1,7 @@ # pylint: disable=C0103,C0114,C0115,C0116 from unittest import TestCase -from hcl2.const import IS_BLOCK, COMMENTS_KEY +from hcl2.const import IS_BLOCK, COMMENTS_KEY, INLINE_COMMENTS_KEY from hcl2.deserializer import BaseDeserializer, DeserializerOptions from hcl2.rules.base import StartRule, BodyRule, BlockRule, AttributeRule from hcl2.rules.containers import ( @@ -467,6 +467,10 @@ def test_is_reserved_key_comments(self): d = _deser() self.assertTrue(d._is_reserved_key(COMMENTS_KEY)) + def test_is_reserved_key_inline_comments(self): + d = _deser() + self.assertTrue(d._is_reserved_key(INLINE_COMMENTS_KEY)) + def test_is_reserved_key_normal_key(self): d = _deser() self.assertFalse(d._is_reserved_key("name")) From 5ce94f810d09d1ffb031d3b80216948a4c4b726d Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sun, 8 Mar 2026 00:46:43 +0100 Subject: [PATCH 42/42] increase minimum test coverage --- .coveragerc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 89ef860b..6e581776 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,6 +10,6 @@ omit = [report] show_missing = true -fail_under = 90 +fail_under = 95 exclude_lines = raise NotImplementedError