Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`).

Always run round-trip full test suite after any modification.

## Pre-commit Checks

Hooks are defined in `.pre-commit-config.yaml` (includes black, mypy, pylint, and others). All changed files must pass these checks before committing. When writing or modifying code:

- Format Python with **black** (Python 3.8 target).
- Ensure **mypy** and **pylint** pass. Pylint config is in `pylintrc`, scoped to `hcl2/` and `test/`.
- End files with a newline; strip trailing whitespace (except under `test/integration/(hcl2_reconstructed|specialized)/`).

## Keeping Docs Current

Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and `docs/usage.md` when changes affect the public API, CLI flags, or option fields.
12 changes: 8 additions & 4 deletions hcl2/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,8 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]):
for attribute in attributes_sequence:
name_length = len(attribute.identifier.token.value)
spaces_to_add = max_length - name_length
base = attribute.children[1].value.lstrip(" ")
attribute.children[1].set_value(" " * spaces_to_add + base)
base = attribute.children[1].value.lstrip(" \t")
attribute.children[1].set_value(" " * (spaces_to_add + 1) + base)

def _vertically_align_object_elems(self, rule: ObjectRule):
max_length = max(self._key_text_width(elem.key) for elem in rule.elements)
Expand All @@ -294,8 +294,12 @@ def _vertically_align_object_elems(self, rule: ObjectRule):
if isinstance(separator, COLON): # type: ignore[misc]
spaces_to_add += 1

base = separator.value.lstrip(" ")
elem.children[1].set_value(" " * spaces_to_add + base)
base = separator.value.lstrip(" \t")
# EQ gets +1 for the base space (reconstructor no longer adds it
# when the token already has leading whitespace); COLON already
# has its own +1 above and the reconstructor doesn't add space.
extra = 1 if not isinstance(separator, COLON) else 0 # type: ignore[misc]
elem.children[1].set_value(" " * (spaces_to_add + extra) + base)

@staticmethod
def _key_text_width(key: LarkElement) -> int:
Expand Down
2 changes: 1 addition & 1 deletion hcl2/hcl2.lark
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RSQB : "]"
COMMA : ","
DOT : "."
EQ : /[ \t]*=(?!=|>)/
COLON : ":"
COLON : /[ \t]*:(?!:)/
DBLQUOTE : "\""

// Interpolation
Expand Down
45 changes: 40 additions & 5 deletions hcl2/reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from lark import Tree, Token
from hcl2.rules import tokens
from hcl2.rules.base import BlockRule
from hcl2.rules.containers import ObjectElemRule
from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule
from hcl2.rules.literal_rules import IdentifierRule
from hcl2.rules.strings import StringRule
Expand Down Expand Up @@ -76,14 +77,20 @@ def _should_add_space_before(
or self._last_token_name
in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()]
):
# COLON may already carry leading whitespace from the grammar
if token_type == tokens.COLON.lark_name() and str(
current_node
).startswith((" ", "\t")):
return False
return True

# Space after
# Space before colon in for_intro
if (
parent_rule_name == ForIntroRule.lark_name()
and token_type == tokens.COLON.lark_name()
):

if str(current_node).startswith((" ", "\t")):
return False
return True

# Space after commas in tuples and function arguments...
Expand Down Expand Up @@ -120,10 +127,28 @@ def _should_add_space_before(
return True

# Space after ellipsis in function arguments
# ... except before newlines which provide their own whitespace
if self._last_token_name == tokens.ELLIPSIS.lark_name():
if token_type == "NL_OR_COMMENT":
return False
return True

if tokens.EQ.lark_name() in [token_type, self._last_token_name]:
# Space around EQ and COLON separators in attributes/object elements.
# Both terminals may carry leading whitespace from the original
# source (e.g. " =" for aligned attributes, " :" for object
# elements). Skip the automatic space when the token already
# provides it. COLON only gets space if it already has leading
# whitespace (unlike EQ which always gets at least one space).
if token_type == tokens.EQ.lark_name():
if str(current_node).startswith((" ", "\t")):
return False
return True
if token_type == tokens.COLON.lark_name():
return False
if self._last_token_name == tokens.EQ.lark_name():
# Don't add space before newlines which provide their own whitespace
if token_type == "NL_OR_COMMENT":
return False
return True

# Don't add space around operator tokens inside unary_op
Expand Down Expand Up @@ -158,14 +183,16 @@ def _should_add_space_before(
):
return True

# Space after colon in for expressions (before value expression,
# but not before newline/comment which provides its own whitespace)
# Space after colon in for expressions and object elements
# (before value expression, but not before newline/comment
# which provides its own whitespace)
if (
self._last_token_name == tokens.COLON.lark_name()
and parent_rule_name
in [
ForTupleExprRule.lark_name(),
ForObjectExprRule.lark_name(),
ObjectElemRule.lark_name(),
]
and rule_name != "new_line_or_comment"
):
Expand Down Expand Up @@ -253,6 +280,14 @@ def reconstruct(self, tree: Tree, postproc=None) -> str:
if postproc:
result = postproc(result)

# The grammar's body rule ends with an optional new_line_or_comment
# which captures the final newline. The parser often produces two
# NL_OR_COMMENT tokens for a single trailing newline (the statement
# separator plus the EOF newline), resulting in a spurious blank line.
# Strip exactly one trailing newline when there are two or more.
if result.endswith("\n\n"):
result = result[:-1]

# Ensure file ends with newline
if result and not result.endswith("\n"):
result += "\n"
Expand Down
7 changes: 5 additions & 2 deletions hcl2/rules/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ def serialize(
"""Serialize to 'lhs operator rhs' string."""
with context.modify(inside_dollar_string=True):
lhs = self.expr_term.serialize(options, context)
operator = self.binary_term.binary_operator.serialize(options, context)
operator = str(
self.binary_term.binary_operator.serialize(options, context)
).strip()
rhs = self.binary_term.expr_term.serialize(options, context)

result = f"{lhs} {operator} {rhs}"
Expand Down Expand Up @@ -286,7 +288,8 @@ def serialize(
) -> Any:
"""Serialize to 'operator operand' string."""
with context.modify(inside_dollar_string=True):
result = f"{self.operator}{self.expr_term.serialize(options, context)}"
operator = self.operator.rstrip()
result = f"{operator}{self.expr_term.serialize(options, context)}"

if not context.inside_dollar_string:
result = to_dollar_string(result)
Expand Down
8 changes: 4 additions & 4 deletions hcl2/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ def __default_token__(self, token: Token) -> StringToken:
# TODO make this return StaticStringToken where applicable
value = token.value
# The EQ terminal /[ \t]*=(?!=|>)/ captures leading whitespace.
# Strip it so parsed and deserialized tokens produce the same "=" value,
# preventing the reconstructor from emitting double spaces.
if token.type == "EQ":
value = value.lstrip(" \t")
# Preserve it so the direct pipeline (to_lark → reconstruct) retains
# original alignment. The reconstructor skips its own space insertion
# when the EQ token already carries leading whitespace.

if value in StaticStringToken.classes_by_value:
return StaticStringToken.classes_by_value[value]()
return StringToken[token.type](value) # type: ignore[misc]
Expand Down
2 changes: 1 addition & 1 deletion test/integration/hcl2_original/smoke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ block label1 label2 {
a = 5
b = 1256.5
c = 15 + (10 * 12)
d = (- a)
d = (-a)
e = (
a == b
? true : false
Expand Down
2 changes: 1 addition & 1 deletion test/integration/hcl2_reconstructed/smoke.tf
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ block {
for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names :
module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"]
]
} ...
} ...
if substr(bucket_name, 0, 1) == "l"
}
}
35 changes: 35 additions & 0 deletions test/integration/test_round_trip.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ def _parse_and_serialize(hcl_text: str, options=None) -> dict:
return rules.serialize()


def _direct_reconstruct(hcl_text: str) -> str:
"""Parse HCL text, transform to IR, convert to Lark tree, and reconstruct."""
parsed_tree = parses_to_tree(hcl_text)
rules = RuleTransformer().transform(parsed_tree)
lark_tree = rules.to_lark()
return HCLReconstructor().reconstruct(lark_tree)


def _deserialize_and_reserialize(serialized: dict) -> dict:
"""Deserialize a Python dict back through the rule tree and reserialize."""
deserializer = BaseDeserializer()
Expand Down Expand Up @@ -120,6 +128,33 @@ def test_hcl_to_json(self):
)


class TestDirectReconstruction(TestCase):
"""Test HCL2 → IR → HCL2 direct pipeline.

Parse HCL, transform to IR, convert directly to Lark tree (skipping
serialization to dict), reconstruct HCL, then verify the result
re-parses to the same JSON as the original.
"""

maxDiff = None

def test_direct_reconstruct(self):
for suite in _get_suites():
with self.subTest(suite=suite):
hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL)
original_hcl = hcl_path.read_text()

# Direct: HCL → IR → Lark → HCL
reconstructed_hcl = _direct_reconstruct(original_hcl)

self.assertMultiLineEqual(
reconstructed_hcl,
original_hcl,
f"Direct reconstruction mismatch for suite {suite}: "
f"HCL → IR → HCL did not match original HCL",
)


class TestRoundTripReserialization(TestCase):
"""Test JSON → JSON reserialization.

Expand Down
Loading