diff --git a/CLAUDE.md b/CLAUDE.md index 833629f0..6823678a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -131,6 +131,14 @@ Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`). Always run round-trip full test suite after any modification. +## Pre-commit Checks + +Hooks are defined in `.pre-commit-config.yaml` (includes black, mypy, pylint, and others). All changed files must pass these checks before committing. When writing or modifying code: + +- Format Python with **black** (Python 3.8 target). +- Ensure **mypy** and **pylint** pass. Pylint config is in `pylintrc`, scoped to `hcl2/` and `test/`. +- End files with a newline; strip trailing whitespace (except under `test/integration/(hcl2_reconstructed|specialized)/`). + ## Keeping Docs Current Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and `docs/usage.md` when changes affect the public API, CLI flags, or option fields. diff --git a/hcl2/formatter.py b/hcl2/formatter.py index 0e66627a..f45a37ff 100644 --- a/hcl2/formatter.py +++ b/hcl2/formatter.py @@ -280,8 +280,8 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]): for attribute in attributes_sequence: name_length = len(attribute.identifier.token.value) spaces_to_add = max_length - name_length - base = attribute.children[1].value.lstrip(" ") - attribute.children[1].set_value(" " * spaces_to_add + base) + base = attribute.children[1].value.lstrip(" \t") + attribute.children[1].set_value(" " * (spaces_to_add + 1) + base) def _vertically_align_object_elems(self, rule: ObjectRule): max_length = max(self._key_text_width(elem.key) for elem in rule.elements) @@ -294,8 +294,12 @@ def _vertically_align_object_elems(self, rule: ObjectRule): if isinstance(separator, COLON): # type: ignore[misc] spaces_to_add += 1 - base = separator.value.lstrip(" ") - elem.children[1].set_value(" " * spaces_to_add + base) + base = separator.value.lstrip(" \t") + # EQ gets +1 for the base space (reconstructor no longer adds it + # when the token already has leading whitespace); COLON already + # has its own +1 above and the reconstructor doesn't add space. + extra = 1 if not isinstance(separator, COLON) else 0 # type: ignore[misc] + elem.children[1].set_value(" " * (spaces_to_add + extra) + base) @staticmethod def _key_text_width(key: LarkElement) -> int: diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark index f0248290..a5c5fba6 100644 --- a/hcl2/hcl2.lark +++ b/hcl2/hcl2.lark @@ -50,7 +50,7 @@ RSQB : "]" COMMA : "," DOT : "." EQ : /[ \t]*=(?!=|>)/ -COLON : ":" +COLON : /[ \t]*:(?!:)/ DBLQUOTE : "\"" // Interpolation diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index cd1538ec..2a9ca708 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -4,6 +4,7 @@ from lark import Tree, Token from hcl2.rules import tokens from hcl2.rules.base import BlockRule +from hcl2.rules.containers import ObjectElemRule from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule from hcl2.rules.literal_rules import IdentifierRule from hcl2.rules.strings import StringRule @@ -76,14 +77,20 @@ def _should_add_space_before( or self._last_token_name in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] ): + # COLON may already carry leading whitespace from the grammar + if token_type == tokens.COLON.lark_name() and str( + current_node + ).startswith((" ", "\t")): + return False return True - # Space after + # Space before colon in for_intro if ( parent_rule_name == ForIntroRule.lark_name() and token_type == tokens.COLON.lark_name() ): - + if str(current_node).startswith((" ", "\t")): + return False return True # Space after commas in tuples and function arguments... @@ -120,10 +127,28 @@ def _should_add_space_before( return True # Space after ellipsis in function arguments + # ... except before newlines which provide their own whitespace if self._last_token_name == tokens.ELLIPSIS.lark_name(): + if token_type == "NL_OR_COMMENT": + return False return True - if tokens.EQ.lark_name() in [token_type, self._last_token_name]: + # Space around EQ and COLON separators in attributes/object elements. + # Both terminals may carry leading whitespace from the original + # source (e.g. " =" for aligned attributes, " :" for object + # elements). Skip the automatic space when the token already + # provides it. COLON only gets space if it already has leading + # whitespace (unlike EQ which always gets at least one space). + if token_type == tokens.EQ.lark_name(): + if str(current_node).startswith((" ", "\t")): + return False + return True + if token_type == tokens.COLON.lark_name(): + return False + if self._last_token_name == tokens.EQ.lark_name(): + # Don't add space before newlines which provide their own whitespace + if token_type == "NL_OR_COMMENT": + return False return True # Don't add space around operator tokens inside unary_op @@ -158,14 +183,16 @@ def _should_add_space_before( ): return True - # Space after colon in for expressions (before value expression, - # but not before newline/comment which provides its own whitespace) + # Space after colon in for expressions and object elements + # (before value expression, but not before newline/comment + # which provides its own whitespace) if ( self._last_token_name == tokens.COLON.lark_name() and parent_rule_name in [ ForTupleExprRule.lark_name(), ForObjectExprRule.lark_name(), + ObjectElemRule.lark_name(), ] and rule_name != "new_line_or_comment" ): @@ -253,6 +280,14 @@ def reconstruct(self, tree: Tree, postproc=None) -> str: if postproc: result = postproc(result) + # The grammar's body rule ends with an optional new_line_or_comment + # which captures the final newline. The parser often produces two + # NL_OR_COMMENT tokens for a single trailing newline (the statement + # separator plus the EOF newline), resulting in a spurious blank line. + # Strip exactly one trailing newline when there are two or more. + if result.endswith("\n\n"): + result = result[:-1] + # Ensure file ends with newline if result and not result.endswith("\n"): result += "\n" diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py index 6636d4c8..f29eea5a 100644 --- a/hcl2/rules/expressions.py +++ b/hcl2/rules/expressions.py @@ -248,7 +248,9 @@ def serialize( """Serialize to 'lhs operator rhs' string.""" with context.modify(inside_dollar_string=True): lhs = self.expr_term.serialize(options, context) - operator = self.binary_term.binary_operator.serialize(options, context) + operator = str( + self.binary_term.binary_operator.serialize(options, context) + ).strip() rhs = self.binary_term.expr_term.serialize(options, context) result = f"{lhs} {operator} {rhs}" @@ -286,7 +288,8 @@ def serialize( ) -> Any: """Serialize to 'operator operand' string.""" with context.modify(inside_dollar_string=True): - result = f"{self.operator}{self.expr_term.serialize(options, context)}" + operator = self.operator.rstrip() + result = f"{operator}{self.expr_term.serialize(options, context)}" if not context.inside_dollar_string: result = to_dollar_string(result) diff --git a/hcl2/transformer.py b/hcl2/transformer.py index f6e94004..7b067462 100644 --- a/hcl2/transformer.py +++ b/hcl2/transformer.py @@ -83,10 +83,10 @@ def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable value = token.value # The EQ terminal /[ \t]*=(?!=|>)/ captures leading whitespace. - # Strip it so parsed and deserialized tokens produce the same "=" value, - # preventing the reconstructor from emitting double spaces. - if token.type == "EQ": - value = value.lstrip(" \t") + # Preserve it so the direct pipeline (to_lark → reconstruct) retains + # original alignment. The reconstructor skips its own space insertion + # when the EQ token already carries leading whitespace. + if value in StaticStringToken.classes_by_value: return StaticStringToken.classes_by_value[value]() return StringToken[token.type](value) # type: ignore[misc] diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf index 32e22466..e2cb9bb8 100644 --- a/test/integration/hcl2_original/smoke.tf +++ b/test/integration/hcl2_original/smoke.tf @@ -3,7 +3,7 @@ block label1 label2 { a = 5 b = 1256.5 c = 15 + (10 * 12) - d = (- a) + d = (-a) e = ( a == b ? true : false diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf index 655ee440..54b3286c 100644 --- a/test/integration/hcl2_reconstructed/smoke.tf +++ b/test/integration/hcl2_reconstructed/smoke.tf @@ -61,7 +61,7 @@ block { for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"] ] - } ... + } ... if substr(bucket_name, 0, 1) == "l" } } diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py index 67217f07..b78141cb 100644 --- a/test/integration/test_round_trip.py +++ b/test/integration/test_round_trip.py @@ -79,6 +79,14 @@ def _parse_and_serialize(hcl_text: str, options=None) -> dict: return rules.serialize() +def _direct_reconstruct(hcl_text: str) -> str: + """Parse HCL text, transform to IR, convert to Lark tree, and reconstruct.""" + parsed_tree = parses_to_tree(hcl_text) + rules = RuleTransformer().transform(parsed_tree) + lark_tree = rules.to_lark() + return HCLReconstructor().reconstruct(lark_tree) + + def _deserialize_and_reserialize(serialized: dict) -> dict: """Deserialize a Python dict back through the rule tree and reserialize.""" deserializer = BaseDeserializer() @@ -120,6 +128,33 @@ def test_hcl_to_json(self): ) +class TestDirectReconstruction(TestCase): + """Test HCL2 → IR → HCL2 direct pipeline. + + Parse HCL, transform to IR, convert directly to Lark tree (skipping + serialization to dict), reconstruct HCL, then verify the result + re-parses to the same JSON as the original. + """ + + maxDiff = None + + def test_direct_reconstruct(self): + for suite in _get_suites(): + with self.subTest(suite=suite): + hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL) + original_hcl = hcl_path.read_text() + + # Direct: HCL → IR → Lark → HCL + reconstructed_hcl = _direct_reconstruct(original_hcl) + + self.assertMultiLineEqual( + reconstructed_hcl, + original_hcl, + f"Direct reconstruction mismatch for suite {suite}: " + f"HCL → IR → HCL did not match original HCL", + ) + + class TestRoundTripReserialization(TestCase): """Test JSON → JSON reserialization.