From ba0039faea5d03e7614638145f2925e4ce8b0086 Mon Sep 17 00:00:00 2001
From: Kamil Kozik <kkozik@amplify.com>
Date: Tue, 10 Mar 2026 14:26:58 +0100
Subject: [PATCH 1/4] Add postlexer to support multiline binary operators and
 ternary expressions

---
 CLAUDE.md                                     |  23 ++-
 hcl2/parser.py                                |   3 +
 hcl2/postlexer.py                             |  80 ++++++++++
 hcl2/reconstructor.py                         |   5 +
 hcl2/rules/expressions.py                     |  14 +-
 hcl2/transformer.py                           |  42 ++++-
 test/integration/hcl2_original/smoke.tf       |  22 +++
 test/integration/hcl2_reconstructed/smoke.tf  |  14 ++
 test/integration/json_reserialized/smoke.json |  12 ++
 test/integration/json_serialized/smoke.json   |  12 ++
 test/unit/rules/test_expressions.py           |  11 +-
 test/unit/test_postlexer.py                   | 150 ++++++++++++++++++
 12 files changed, 370 insertions(+), 18 deletions(-)
 create mode 100644 hcl2/postlexer.py
 create mode 100644 test/unit/test_postlexer.py

diff --git a/CLAUDE.md b/CLAUDE.md
index e09f58c2..833629f0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -3,16 +3,20 @@
 ## Pipeline
 
 ```
-Forward:  HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict/JSON
+Forward:  HCL2 Text → [PostLexer] → Lark Parse Tree → LarkElement Tree → Python Dict/JSON
 Reverse:  Python Dict/JSON → LarkElement Tree → Lark Tree → HCL2 Text
+Direct:   HCL2 Text → [PostLexer] → Lark Parse Tree → LarkElement Tree → Lark Tree → HCL2 Text
 ```
 
+The **Direct** pipeline (`parse_to_tree` → `transform` → `to_lark` → `reconstruct`) skips serialization to dict, so all IR nodes (including `NewLineOrCommentRule` nodes for whitespace/comments) directly influence the reconstructed output. Any information discarded before the IR is lost in this pipeline.
+
 ## Module Map
 
 | Module | Role |
 |---|---|
 | `hcl2/hcl2.lark` | Lark grammar definition |
 | `hcl2/api.py` | Public API (`load/loads/dump/dumps` + intermediate stages) |
+| `hcl2/postlexer.py` | Token stream transforms between lexer and parser |
 | `hcl2/parser.py` | Lark parser factory with caching |
 | `hcl2/transformer.py` | Lark parse tree → LarkElement tree |
 | `hcl2/deserializer.py` | Python dict → LarkElement tree |
@@ -73,6 +77,16 @@ jsontohcl2 --indent 4 --no-align file.json
 
 Add new options as `parser.add_argument()` calls in the relevant entry point module.
 
+## PostLexer (`postlexer.py`)
+
+Lark's `postlex` parameter accepts a single object with a `process(stream)` method that transforms the token stream between the lexer and LALR parser. The `PostLexer` class is designed for extensibility: each transformation is a private method that accepts and yields tokens, and `process()` chains them together.
+
+Current passes:
+
+- `_merge_newlines_into_operators`
+
+To add a new pass: create a private method with the same `(self, stream) -> generator` signature, and add a `yield from` call in `process()`.
+
 ## Hard Rules
 
 These are project-specific constraints that must not be violated:
@@ -88,6 +102,7 @@ These are project-specific constraints that must not be violated:
 ## Adding a New Language Construct
 
 1. Add grammar rules to `hcl2.lark`
+1. If the new construct creates LALR ambiguities with `NL_OR_COMMENT`, add a postlexer pass in `postlexer.py`
 1. Create rule class(es) in the appropriate `rules/` file
 1. Add transformer method(s) in `transformer.py`
 1. Implement `serialize()` in the rule class
@@ -103,9 +118,9 @@ python -m unittest discover -s test -p "test_*.py" -v
 
 **Unit tests** (`test/unit/`): instantiate rule objects directly (no parsing).
 
-- `test/unit/rules/` — one file per rules module
-- `test/unit/cli/` — one file per CLI module
-- `test/unit/test_api.py`, `test_builder.py`, `test_deserializer.py`, `test_formatter.py`, `test_reconstructor.py`, `test_utils.py`
+- `rules/` — one file per rules module
+- `cli/` — one file per CLI module
+- `test_*.py` — tests for corresponding files from `hcl2/` directory
 
 Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`).
 
diff --git a/hcl2/parser.py b/hcl2/parser.py
index a33fe5f8..d275a589 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -4,6 +4,8 @@
 
 from lark import Lark
 
+from hcl2.postlexer import PostLexer
+
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
 
@@ -17,4 +19,5 @@ def parser() -> Lark:
         cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
         rel_to=__file__,
         propagate_positions=True,
+        postlex=PostLexer(),
     )
diff --git a/hcl2/postlexer.py b/hcl2/postlexer.py
new file mode 100644
index 00000000..0418f362
--- /dev/null
+++ b/hcl2/postlexer.py
@@ -0,0 +1,80 @@
+"""Postlexer that transforms the token stream between the Lark lexer and parser.
+
+Each transformation is implemented as a private method that accepts and yields
+tokens.  The public ``process`` method chains them together, making it easy to
+add new passes without touching existing logic.
+"""
+
+from collections.abc import Iterator
+from typing import FrozenSet, Optional, Tuple
+
+from lark import Token
+
+# Type alias for a token stream consumed and produced by each pass.
+TokenStream = Iterator[Token]
+
+# Operator token types that may legally follow a line-continuation newline.
+# MINUS is excluded — it is also the unary negation operator, and merging a
+# newline into MINUS would incorrectly consume statement-separating newlines
+# before negative literals (e.g. "a = 1\nb = -2").
+OPERATOR_TYPES: FrozenSet[str] = frozenset(
+    {
+        "DOUBLE_EQ",
+        "NEQ",
+        "LT",
+        "GT",
+        "LEQ",
+        "GEQ",
+        "ASTERISK",
+        "SLASH",
+        "PERCENT",
+        "DOUBLE_AMP",
+        "DOUBLE_PIPE",
+        "PLUS",
+        "QMARK",
+    }
+)
+
+
+class PostLexer:
+    """Transform the token stream before it reaches the LALR parser."""
+
+    def process(self, stream: TokenStream) -> TokenStream:
+        """Chain all postlexer passes over the token stream."""
+        yield from self._merge_newlines_into_operators(stream)
+
+    def _merge_newlines_into_operators(self, stream: TokenStream) -> TokenStream:
+        """Merge NL_OR_COMMENT tokens into immediately following operator tokens.
+
+        LALR parsers cannot distinguish a statement-ending newline from a
+        line-continuation newline before a binary operator.  This pass resolves
+        the ambiguity by merging NL_OR_COMMENT into the operator token's value
+        when the next token is a binary operator or QMARK.  The transformer
+        later extracts the newline prefix and creates a NewLineOrCommentRule
+        node, preserving round-trip fidelity.
+        """
+        pending_nl: Optional[Token] = None
+        for token in stream:
+            if token.type == "NL_OR_COMMENT":
+                if pending_nl is not None:
+                    yield pending_nl
+                pending_nl = token
+            else:
+                if pending_nl is not None:
+                    if token.type in OPERATOR_TYPES:
+                        token = token.update(value=str(pending_nl) + str(token))
+                    else:
+                        yield pending_nl
+                    pending_nl = None
+                yield token
+        if pending_nl is not None:
+            yield pending_nl
+
+    @property
+    def always_accept(self) -> Tuple[()]:
+        """Terminal names the parser must accept even when not expected by LALR.
+
+        Lark requires this property on postlexer objects.  An empty tuple
+        means no extra terminals are injected.
+        """
+        return ()
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
index 4760665a..cd1538ec 100644
--- a/hcl2/reconstructor.py
+++ b/hcl2/reconstructor.py
@@ -183,6 +183,11 @@ def _reconstruct_tree(
         # Check spacing BEFORE processing children, while _last_rule_name
         # still reflects the previous sibling (not a child of this tree).
         needs_space = self._should_add_space_before(tree, parent_rule_name)
+        if needs_space:
+            # A space will be inserted before this tree's output, so tell
+            # children that the last character was a space to prevent the
+            # first child from adding a duplicate leading space.
+            self._last_was_space = True
 
         if rule_name == UnaryOpRule.lark_name():
             for i, child in enumerate(tree.children):
diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py
index c29859a3..6636d4c8 100644
--- a/hcl2/rules/expressions.py
+++ b/hcl2/rules/expressions.py
@@ -122,6 +122,7 @@ class ConditionalRule(ExpressionRule):
 
     _children_layout: Tuple[
         ExpressionRule,
+        Optional[NewLineOrCommentRule],
         QMARK,
         Optional[NewLineOrCommentRule],
         ExpressionRule,
@@ -137,7 +138,7 @@ def lark_name() -> str:
         return "conditional"
 
     def __init__(self, children, meta: Optional[Meta] = None):
-        self._insert_optionals(children, [2, 4, 6])
+        self._insert_optionals(children, [1, 3, 5, 7])
         super().__init__(children, meta)
 
     @property
@@ -148,12 +149,12 @@ def condition(self) -> ExpressionRule:
     @property
     def if_true(self) -> ExpressionRule:
         """Return the true-branch expression."""
-        return self._children[3]
+        return self._children[4]
 
     @property
     def if_false(self) -> ExpressionRule:
         """Return the false-branch expression."""
-        return self._children[7]
+        return self._children[8]
 
     def serialize(
         self, options=SerializationOptions(), context=SerializationContext()
@@ -179,6 +180,7 @@ class BinaryTermRule(ExpressionRule):
     """Rule for the operator+operand portion of a binary operation."""
 
     _children_layout: Tuple[
+        Optional[NewLineOrCommentRule],
         BinaryOperatorRule,
         Optional[NewLineOrCommentRule],
         ExprTermRule,
@@ -190,18 +192,18 @@ def lark_name() -> str:
         return "binary_term"
 
     def __init__(self, children, meta: Optional[Meta] = None):
-        self._insert_optionals(children, [1])
+        self._insert_optionals(children, [0, 2])
         super().__init__(children, meta)
 
     @property
     def binary_operator(self) -> BinaryOperatorRule:
         """Return the binary operator."""
-        return self._children[0]
+        return self._children[1]
 
     @property
     def expr_term(self) -> ExprTermRule:
         """Return the right-hand operand."""
-        return self._children[2]
+        return self._children[3]
 
     def serialize(
         self, options=SerializationOptions(), context=SerializationContext()
diff --git a/hcl2/transformer.py b/hcl2/transformer.py
index d0a09630..f6e94004 100644
--- a/hcl2/transformer.py
+++ b/hcl2/transformer.py
@@ -81,9 +81,15 @@ def __init__(self, discard_new_line_or_comments: bool = False):
 
     def __default_token__(self, token: Token) -> StringToken:
         # TODO make this return StaticStringToken where applicable
-        if token.value in StaticStringToken.classes_by_value:
-            return StaticStringToken.classes_by_value[token.value]()
-        return StringToken[token.type](token.value)  # type: ignore[misc]
+        value = token.value
+        # The EQ terminal /[ \t]*=(?!=|>)/ captures leading whitespace.
+        # Strip it so parsed and deserialized tokens produce the same "=" value,
+        # preventing the reconstructor from emitting double spaces.
+        if token.type == "EQ":
+            value = value.lstrip(" \t")
+        if value in StaticStringToken.classes_by_value:
+            return StaticStringToken.classes_by_value[value]()
+        return StringToken[token.type](value)  # type: ignore[misc]
 
     # pylint: disable=C0103
     def FLOAT_LITERAL(self, token: Token) -> FloatLiteral:
@@ -164,8 +170,32 @@ def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule:
     def expr_term(self, meta: Meta, args) -> ExprTermRule:
         return ExprTermRule(args, meta)
 
+    def _extract_nl_prefix(self, token):
+        """Strip leading newlines from a token value.
+
+        If the token contains a newline prefix (from the postlexer merging a
+        line-continuation newline into the operator token), strip it and
+        return a NewLineOrCommentRule.  Otherwise return None.
+        """
+        value = str(token.value)
+        stripped = value.lstrip("\n \t")
+        if len(stripped) == len(value):
+            return None
+        nl_text = value[: len(value) - len(stripped)]
+        token.set_value(stripped)
+        if self.discard_new_line_or_comments:
+            return None
+        return NewLineOrCommentRule.from_string(nl_text)
+
     @v_args(meta=True)
     def conditional(self, meta: Meta, args) -> ConditionalRule:
+        # args: [condition, QMARK, NL?, if_true, NL?, COLON, NL?, if_false]
+        # QMARK is at index 1 — check for NL prefix from the postlexer
+        qmark_token = args[1]
+        nl_rule = self._extract_nl_prefix(qmark_token)
+        if nl_rule is not None:
+            args = list(args)
+            args.insert(1, nl_rule)
         return ConditionalRule(args, meta)
 
     @v_args(meta=True)
@@ -174,6 +204,12 @@ def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule:
 
     @v_args(meta=True)
     def binary_term(self, meta: Meta, args) -> BinaryTermRule:
+        # args: [BinaryOperatorRule, NL?, ExprTermRule]
+        # The operator's token may contain a NL prefix from the postlexer
+        op_rule = args[0]
+        nl_rule = self._extract_nl_prefix(op_rule.token)
+        if nl_rule is not None:
+            args = [nl_rule] + list(args)
         return BinaryTermRule(args, meta)
 
     @v_args(meta=True)
diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf
index 99537532..32e22466 100644
--- a/test/integration/hcl2_original/smoke.tf
+++ b/test/integration/hcl2_original/smoke.tf
@@ -43,6 +43,28 @@ block label1 label2 {
   }
 }
 
+block multiline_ternary {
+  foo = (
+    bar
+    ? baz(foo)
+    : foo == "bar"
+    ? "baz"
+    : foo
+  )
+}
+
+block multiline_binary_ops {
+  expr = {
+    for k, v in local.map_a : k => v
+    if lookup(local.map_b[v.id
+    ], "enabled", false)
+    || (
+    contains(local.map_c, v.id)
+    && contains(local.map_d, v.id)
+    )
+  }
+}
+
 block {
   route53_forwarding_rule_shares = {
     for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) :
diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf
index c8529e70..655ee440 100644
--- a/test/integration/hcl2_reconstructed/smoke.tf
+++ b/test/integration/hcl2_reconstructed/smoke.tf
@@ -39,6 +39,20 @@ block label1 label2 {
 }
 
 
+block multiline_ternary {
+  foo = (bar ? baz(foo) : foo == "bar" ? "baz" : foo)
+}
+
+
+block multiline_binary_ops {
+  expr = {
+    for k, v in local.map_a :
+    k => v
+    if lookup(local.map_b[v.id], "enabled", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))
+  }
+}
+
+
 block {
   route53_forwarding_rule_shares = {
     for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) :
diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json
index dbff114f..ec003473 100644
--- a/test/integration/json_reserialized/smoke.json
+++ b/test/integration/json_reserialized/smoke.json
@@ -48,6 +48,18 @@
     }
    }
   },
+  {
+   "multiline_ternary": {
+    "foo": "${(bar ? baz(foo) : foo == \"bar\" ? \"baz\" : foo)}",
+    "__is_block__": true
+   }
+  },
+  {
+   "multiline_binary_ops": {
+    "expr": "${{for k, v in local.map_a : k => v if lookup(local.map_b[v.id], \"enabled\", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))}}",
+    "__is_block__": true
+   }
+  },
   {
    "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}",
    "__is_block__": true
diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json
index dbff114f..ec003473 100644
--- a/test/integration/json_serialized/smoke.json
+++ b/test/integration/json_serialized/smoke.json
@@ -48,6 +48,18 @@
     }
    }
   },
+  {
+   "multiline_ternary": {
+    "foo": "${(bar ? baz(foo) : foo == \"bar\" ? \"baz\" : foo)}",
+    "__is_block__": true
+   }
+  },
+  {
+   "multiline_binary_ops": {
+    "expr": "${{for k, v in local.map_a : k => v if lookup(local.map_b[v.id], \"enabled\", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))}}",
+    "__is_block__": true
+   }
+  },
   {
    "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}",
    "__is_block__": true
diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py
index 974885b5..8ab7e8db 100644
--- a/test/unit/rules/test_expressions.py
+++ b/test/unit/rules/test_expressions.py
@@ -194,8 +194,8 @@ def test_lark_name(self):
 
     def test_construction_inserts_optional_slots(self):
         rule = self._make_conditional()
-        # Should have 8 children after _insert_optionals at [2, 4, 6]
-        self.assertEqual(len(rule.children), 8)
+        # Should have 9 children after _insert_optionals at [1, 3, 5, 7]
+        self.assertEqual(len(rule.children), 9)
 
     def test_condition_property(self):
         cond = StubExpression("cond")
@@ -274,9 +274,10 @@ def test_lark_name(self):
 
     def test_construction_inserts_optional(self):
         rule = _make_binary_term("+", "b")
-        # [BinaryOperatorRule, None, ExprTermRule]
-        self.assertEqual(len(rule.children), 3)
-        self.assertIsNone(rule.children[1])
+        # [None, BinaryOperatorRule, None, ExprTermRule]
+        self.assertEqual(len(rule.children), 4)
+        self.assertIsNone(rule.children[0])
+        self.assertIsNone(rule.children[2])
 
     def test_binary_operator_property(self):
         op = _make_binary_operator("+")
diff --git a/test/unit/test_postlexer.py b/test/unit/test_postlexer.py
new file mode 100644
index 00000000..07a8164b
--- /dev/null
+++ b/test/unit/test_postlexer.py
@@ -0,0 +1,150 @@
+# pylint: disable=C0103,C0114,C0115,C0116
+"""Unit tests for hcl2.postlexer.
+
+Tests parse real HCL2 snippets through the full pipeline to verify that the
+postlexer correctly handles newlines before binary operators and QMARK.
+"""
+
+from unittest import TestCase
+
+from lark import Token
+
+from hcl2.api import loads
+from hcl2.postlexer import OPERATOR_TYPES, PostLexer
+
+
+class TestMergeNewlinesIntoOperators(TestCase):
+    """Test _merge_newlines_into_operators at the token-stream level."""
+
+    def _run(self, tokens):
+        """Run the postlexer pass and return a list of tokens."""
+        return list(PostLexer()._merge_newlines_into_operators(iter(tokens)))
+
+    def test_no_newlines_passes_through(self):
+        tokens = [Token("NAME", "a"), Token("PLUS", "+"), Token("NAME", "b")]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "+")
+
+    def test_newline_before_operator_is_merged(self):
+        tokens = [
+            Token("NAME", "a"),
+            Token("NL_OR_COMMENT", "\n    "),
+            Token("PLUS", "+"),
+            Token("NAME", "b"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "\n    +")
+
+    def test_newline_before_non_operator_is_preserved(self):
+        tokens = [
+            Token("NAME", "a"),
+            Token("NL_OR_COMMENT", "\n"),
+            Token("NAME", "b"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "NL_OR_COMMENT")
+
+    def test_consecutive_newlines_first_yielded(self):
+        tokens = [
+            Token("NL_OR_COMMENT", "\n"),
+            Token("NL_OR_COMMENT", "\n    "),
+            Token("PLUS", "+"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result[0].type, "NL_OR_COMMENT")
+        self.assertEqual(str(result[0]), "\n")
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "\n    +")
+
+    def test_trailing_newline_is_yielded(self):
+        tokens = [Token("NAME", "a"), Token("NL_OR_COMMENT", "\n")]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result[1].type, "NL_OR_COMMENT")
+
+    def test_all_operator_types_are_merged(self):
+        for op_type in sorted(OPERATOR_TYPES):
+            with self.subTest(op_type=op_type):
+                tokens = [
+                    Token("NL_OR_COMMENT", "\n"),
+                    Token(op_type, "x"),
+                ]
+                result = self._run(tokens)
+                self.assertEqual(len(result), 1)
+                self.assertEqual(result[0].type, op_type)
+                self.assertTrue(str(result[0]).startswith("\n"))
+
+    def test_minus_not_in_operator_types(self):
+        self.assertNotIn("MINUS", OPERATOR_TYPES)
+
+
+class TestMultilineOperatorParsing(TestCase):
+    """Test that HCL2 snippets with multiline operators parse correctly."""
+
+    def test_multiline_ternary(self):
+        hcl = 'x = (\n  a\n  ? "yes"\n  : "no"\n)\n'
+        result = loads(hcl)
+        self.assertEqual(result["x"], '${(a ? "yes" : "no")}')
+
+    def test_multiline_logical_or(self):
+        hcl = "x = (\n  a\n  || b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a || b)}")
+
+    def test_multiline_logical_and(self):
+        hcl = "x = (\n  a\n  && b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a && b)}")
+
+    def test_multiline_equality(self):
+        hcl = "x = (\n  a\n  == b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a == b)}")
+
+    def test_multiline_not_equal(self):
+        hcl = "x = (\n  a\n  != b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a != b)}")
+
+    def test_multiline_comparison(self):
+        hcl = "x = (\n  a\n  >= b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a >= b)}")
+
+    def test_multiline_addition(self):
+        hcl = "x = (\n  a\n  + b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a + b)}")
+
+    def test_multiline_multiplication(self):
+        hcl = "x = (\n  a\n  * b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a * b)}")
+
+    def test_multiline_chained_operators(self):
+        hcl = "x = (\n  a\n  && b\n  && c\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a && b && c)}")
+
+    def test_multiline_nested_ternary(self):
+        hcl = 'x = (\n  a\n  ? b\n  : c == "d"\n  ? "e"\n  : f\n)\n'
+        result = loads(hcl)
+        self.assertEqual(result["x"], '${(a ? b : c == "d" ? "e" : f)}')
+
+    def test_minus_on_new_line_is_separate_attribute(self):
+        """MINUS is excluded from merging — newline before - starts a new statement."""
+        hcl = "a = 1\nb = -2\n"
+        result = loads(hcl)
+        self.assertEqual(result["a"], 1)
+        self.assertIn("b", result)
+
+    def test_single_line_operators_still_work(self):
+        hcl = "x = a + b\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${a + b}")

From a3bc62cbaead219648d5e635d79b5e15713d8672 Mon Sep 17 00:00:00 2001
From: Kamil Kozik <kkozik@amplify.com>
Date: Tue, 10 Mar 2026 15:05:23 +0100
Subject: [PATCH 2/4] more robust whitespace handling during reconstruction

---
 hcl2/formatter.py                            | 12 ++++--
 hcl2/hcl2.lark                               |  2 +-
 hcl2/reconstructor.py                        | 45 +++++++++++++++++---
 hcl2/rules/expressions.py                    |  7 ++-
 hcl2/transformer.py                          |  8 ++--
 test/integration/hcl2_original/smoke.tf      |  2 +-
 test/integration/hcl2_reconstructed/smoke.tf |  2 +-
 test/integration/test_round_trip.py          | 35 +++++++++++++++
 8 files changed, 95 insertions(+), 18 deletions(-)

diff --git a/hcl2/formatter.py b/hcl2/formatter.py
index 20c3dced..5fc1c1b3 100644
--- a/hcl2/formatter.py
+++ b/hcl2/formatter.py
@@ -272,8 +272,8 @@ def _align_attributes_sequence(self, attributes_sequence: List[AttributeRule]):
         for attribute in attributes_sequence:
             name_length = len(attribute.identifier.token.value)
             spaces_to_add = max_length - name_length
-            base = attribute.children[1].value.lstrip(" ")
-            attribute.children[1].set_value(" " * spaces_to_add + base)
+            base = attribute.children[1].value.lstrip(" \t")
+            attribute.children[1].set_value(" " * (spaces_to_add + 1) + base)
 
     def _vertically_align_object_elems(self, rule: ObjectRule):
         max_length = max(self._key_text_width(elem.key) for elem in rule.elements)
@@ -286,8 +286,12 @@ def _vertically_align_object_elems(self, rule: ObjectRule):
             if isinstance(separator, COLON):  # type: ignore[misc]
                 spaces_to_add += 1
 
-            base = separator.value.lstrip(" ")
-            elem.children[1].set_value(" " * spaces_to_add + base)
+            base = separator.value.lstrip(" \t")
+            # EQ gets +1 for the base space (reconstructor no longer adds it
+            # when the token already has leading whitespace); COLON already
+            # has its own +1 above and the reconstructor doesn't add space.
+            extra = 1 if not isinstance(separator, COLON) else 0  # type: ignore[misc]
+            elem.children[1].set_value(" " * (spaces_to_add + extra) + base)
 
     @staticmethod
     def _key_text_width(key: LarkElement) -> int:
diff --git a/hcl2/hcl2.lark b/hcl2/hcl2.lark
index f0248290..a5c5fba6 100644
--- a/hcl2/hcl2.lark
+++ b/hcl2/hcl2.lark
@@ -50,7 +50,7 @@ RSQB : "]"
 COMMA : ","
 DOT : "."
 EQ : /[ \t]*=(?!=|>)/
-COLON : ":"
+COLON : /[ \t]*:(?!:)/
 DBLQUOTE : "\""
 
 // Interpolation
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
index cd1538ec..2a9ca708 100644
--- a/hcl2/reconstructor.py
+++ b/hcl2/reconstructor.py
@@ -4,6 +4,7 @@
 from lark import Tree, Token
 from hcl2.rules import tokens
 from hcl2.rules.base import BlockRule
+from hcl2.rules.containers import ObjectElemRule
 from hcl2.rules.for_expressions import ForIntroRule, ForTupleExprRule, ForObjectExprRule
 from hcl2.rules.literal_rules import IdentifierRule
 from hcl2.rules.strings import StringRule
@@ -76,14 +77,20 @@ def _should_add_space_before(
                 or self._last_token_name
                 in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()]
             ):
+                # COLON may already carry leading whitespace from the grammar
+                if token_type == tokens.COLON.lark_name() and str(
+                    current_node
+                ).startswith((" ", "\t")):
+                    return False
                 return True
 
-            # Space after
+            # Space before colon in for_intro
             if (
                 parent_rule_name == ForIntroRule.lark_name()
                 and token_type == tokens.COLON.lark_name()
             ):
-
+                if str(current_node).startswith((" ", "\t")):
+                    return False
                 return True
 
             # Space after commas in tuples and function arguments...
@@ -120,10 +127,28 @@ def _should_add_space_before(
                 return True
 
             # Space after ellipsis in function arguments
+            # ... except before newlines which provide their own whitespace
             if self._last_token_name == tokens.ELLIPSIS.lark_name():
+                if token_type == "NL_OR_COMMENT":
+                    return False
                 return True
 
-            if tokens.EQ.lark_name() in [token_type, self._last_token_name]:
+            # Space around EQ and COLON separators in attributes/object elements.
+            # Both terminals may carry leading whitespace from the original
+            # source (e.g. "   =" for aligned attributes, " :" for object
+            # elements).  Skip the automatic space when the token already
+            # provides it.  COLON only gets space if it already has leading
+            # whitespace (unlike EQ which always gets at least one space).
+            if token_type == tokens.EQ.lark_name():
+                if str(current_node).startswith((" ", "\t")):
+                    return False
+                return True
+            if token_type == tokens.COLON.lark_name():
+                return False
+            if self._last_token_name == tokens.EQ.lark_name():
+                # Don't add space before newlines which provide their own whitespace
+                if token_type == "NL_OR_COMMENT":
+                    return False
                 return True
 
             # Don't add space around operator tokens inside unary_op
@@ -158,14 +183,16 @@ def _should_add_space_before(
             ):
                 return True
 
-            # Space after colon in for expressions (before value expression,
-            # but not before newline/comment which provides its own whitespace)
+            # Space after colon in for expressions and object elements
+            # (before value expression, but not before newline/comment
+            # which provides its own whitespace)
             if (
                 self._last_token_name == tokens.COLON.lark_name()
                 and parent_rule_name
                 in [
                     ForTupleExprRule.lark_name(),
                     ForObjectExprRule.lark_name(),
+                    ObjectElemRule.lark_name(),
                 ]
                 and rule_name != "new_line_or_comment"
             ):
@@ -253,6 +280,14 @@ def reconstruct(self, tree: Tree, postproc=None) -> str:
         if postproc:
             result = postproc(result)
 
+        # The grammar's body rule ends with an optional new_line_or_comment
+        # which captures the final newline.  The parser often produces two
+        # NL_OR_COMMENT tokens for a single trailing newline (the statement
+        # separator plus the EOF newline), resulting in a spurious blank line.
+        # Strip exactly one trailing newline when there are two or more.
+        if result.endswith("\n\n"):
+            result = result[:-1]
+
         # Ensure file ends with newline
         if result and not result.endswith("\n"):
             result += "\n"
diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py
index 6636d4c8..f29eea5a 100644
--- a/hcl2/rules/expressions.py
+++ b/hcl2/rules/expressions.py
@@ -248,7 +248,9 @@ def serialize(
         """Serialize to 'lhs operator rhs' string."""
         with context.modify(inside_dollar_string=True):
             lhs = self.expr_term.serialize(options, context)
-            operator = self.binary_term.binary_operator.serialize(options, context)
+            operator = str(
+                self.binary_term.binary_operator.serialize(options, context)
+            ).strip()
             rhs = self.binary_term.expr_term.serialize(options, context)
 
         result = f"{lhs} {operator} {rhs}"
@@ -286,7 +288,8 @@ def serialize(
     ) -> Any:
         """Serialize to 'operator operand' string."""
         with context.modify(inside_dollar_string=True):
-            result = f"{self.operator}{self.expr_term.serialize(options, context)}"
+            operator = self.operator.rstrip()
+            result = f"{operator}{self.expr_term.serialize(options, context)}"
 
         if not context.inside_dollar_string:
             result = to_dollar_string(result)
diff --git a/hcl2/transformer.py b/hcl2/transformer.py
index f6e94004..7b067462 100644
--- a/hcl2/transformer.py
+++ b/hcl2/transformer.py
@@ -83,10 +83,10 @@ def __default_token__(self, token: Token) -> StringToken:
         # TODO make this return StaticStringToken where applicable
         value = token.value
         # The EQ terminal /[ \t]*=(?!=|>)/ captures leading whitespace.
-        # Strip it so parsed and deserialized tokens produce the same "=" value,
-        # preventing the reconstructor from emitting double spaces.
-        if token.type == "EQ":
-            value = value.lstrip(" \t")
+        # Preserve it so the direct pipeline (to_lark → reconstruct) retains
+        # original alignment.  The reconstructor skips its own space insertion
+        # when the EQ token already carries leading whitespace.
+
         if value in StaticStringToken.classes_by_value:
             return StaticStringToken.classes_by_value[value]()
         return StringToken[token.type](value)  # type: ignore[misc]
diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf
index 32e22466..e2cb9bb8 100644
--- a/test/integration/hcl2_original/smoke.tf
+++ b/test/integration/hcl2_original/smoke.tf
@@ -3,7 +3,7 @@ block label1 label2 {
   a = 5
   b = 1256.5
   c = 15 + (10 * 12)
-  d = (- a)
+  d = (-a)
   e = (
     a == b
     ? true : false
diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf
index 655ee440..54b3286c 100644
--- a/test/integration/hcl2_reconstructed/smoke.tf
+++ b/test/integration/hcl2_reconstructed/smoke.tf
@@ -61,7 +61,7 @@ block {
         for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names :
         module.remote_state_subaccounts.map[account_name].outputs["aws_account_id"]
       ]
-    } ... 
+    } ...
     if substr(bucket_name, 0, 1) == "l"
   }
 }
diff --git a/test/integration/test_round_trip.py b/test/integration/test_round_trip.py
index 67217f07..b78141cb 100644
--- a/test/integration/test_round_trip.py
+++ b/test/integration/test_round_trip.py
@@ -79,6 +79,14 @@ def _parse_and_serialize(hcl_text: str, options=None) -> dict:
     return rules.serialize()
 
 
+def _direct_reconstruct(hcl_text: str) -> str:
+    """Parse HCL text, transform to IR, convert to Lark tree, and reconstruct."""
+    parsed_tree = parses_to_tree(hcl_text)
+    rules = RuleTransformer().transform(parsed_tree)
+    lark_tree = rules.to_lark()
+    return HCLReconstructor().reconstruct(lark_tree)
+
+
 def _deserialize_and_reserialize(serialized: dict) -> dict:
     """Deserialize a Python dict back through the rule tree and reserialize."""
     deserializer = BaseDeserializer()
@@ -120,6 +128,33 @@ def test_hcl_to_json(self):
                 )
 
 
+class TestDirectReconstruction(TestCase):
+    """Test HCL2 → IR → HCL2 direct pipeline.
+
+    Parse HCL, transform to IR, convert directly to Lark tree (skipping
+    serialization to dict), reconstruct HCL, then verify the result
+    re-parses to the same JSON as the original.
+    """
+
+    maxDiff = None
+
+    def test_direct_reconstruct(self):
+        for suite in _get_suites():
+            with self.subTest(suite=suite):
+                hcl_path = _get_suite_file(suite, SuiteStep.ORIGINAL)
+                original_hcl = hcl_path.read_text()
+
+                # Direct: HCL → IR → Lark → HCL
+                reconstructed_hcl = _direct_reconstruct(original_hcl)
+
+                self.assertMultiLineEqual(
+                    reconstructed_hcl,
+                    original_hcl,
+                    f"Direct reconstruction mismatch for suite {suite}: "
+                    f"HCL → IR → HCL did not match original HCL",
+                )
+
+
 class TestRoundTripReserialization(TestCase):
     """Test JSON → JSON reserialization.
 

From c9a10d96f6781c00aca4277e0adc2881f362ee63 Mon Sep 17 00:00:00 2001
From: Kamil Kozik <kkozik@amplify.com>
Date: Tue, 10 Mar 2026 14:26:58 +0100
Subject: [PATCH 3/4] Add postlexer to support multiline binary operators and
 ternary expressions

---
 CLAUDE.md                                     |  23 ++-
 hcl2/parser.py                                |   3 +
 hcl2/postlexer.py                             |  79 +++++++++
 hcl2/reconstructor.py                         |   5 +
 hcl2/rules/expressions.py                     |  14 +-
 hcl2/transformer.py                           |  42 ++++-
 test/integration/hcl2_original/smoke.tf       |  22 +++
 test/integration/hcl2_reconstructed/smoke.tf  |  14 ++
 test/integration/json_reserialized/smoke.json |  12 ++
 test/integration/json_serialized/smoke.json   |  12 ++
 test/unit/rules/test_expressions.py           |  11 +-
 test/unit/test_postlexer.py                   | 150 ++++++++++++++++++
 12 files changed, 369 insertions(+), 18 deletions(-)
 create mode 100644 hcl2/postlexer.py
 create mode 100644 test/unit/test_postlexer.py

diff --git a/CLAUDE.md b/CLAUDE.md
index e09f58c2..833629f0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -3,16 +3,20 @@
 ## Pipeline
 
 ```
-Forward:  HCL2 Text → Lark Parse Tree → LarkElement Tree → Python Dict/JSON
+Forward:  HCL2 Text → [PostLexer] → Lark Parse Tree → LarkElement Tree → Python Dict/JSON
 Reverse:  Python Dict/JSON → LarkElement Tree → Lark Tree → HCL2 Text
+Direct:   HCL2 Text → [PostLexer] → Lark Parse Tree → LarkElement Tree → Lark Tree → HCL2 Text
 ```
 
+The **Direct** pipeline (`parse_to_tree` → `transform` → `to_lark` → `reconstruct`) skips serialization to dict, so all IR nodes (including `NewLineOrCommentRule` nodes for whitespace/comments) directly influence the reconstructed output. Any information discarded before the IR is lost in this pipeline.
+
 ## Module Map
 
 | Module | Role |
 |---|---|
 | `hcl2/hcl2.lark` | Lark grammar definition |
 | `hcl2/api.py` | Public API (`load/loads/dump/dumps` + intermediate stages) |
+| `hcl2/postlexer.py` | Token stream transforms between lexer and parser |
 | `hcl2/parser.py` | Lark parser factory with caching |
 | `hcl2/transformer.py` | Lark parse tree → LarkElement tree |
 | `hcl2/deserializer.py` | Python dict → LarkElement tree |
@@ -73,6 +77,16 @@ jsontohcl2 --indent 4 --no-align file.json
 
 Add new options as `parser.add_argument()` calls in the relevant entry point module.
 
+## PostLexer (`postlexer.py`)
+
+Lark's `postlex` parameter accepts a single object with a `process(stream)` method that transforms the token stream between the lexer and LALR parser. The `PostLexer` class is designed for extensibility: each transformation is a private method that accepts and yields tokens, and `process()` chains them together.
+
+Current passes:
+
+- `_merge_newlines_into_operators`
+
+To add a new pass: create a private method with the same `(self, stream) -> generator` signature, and add a `yield from` call in `process()`.
+
 ## Hard Rules
 
 These are project-specific constraints that must not be violated:
@@ -88,6 +102,7 @@ These are project-specific constraints that must not be violated:
 ## Adding a New Language Construct
 
 1. Add grammar rules to `hcl2.lark`
+1. If the new construct creates LALR ambiguities with `NL_OR_COMMENT`, add a postlexer pass in `postlexer.py`
 1. Create rule class(es) in the appropriate `rules/` file
 1. Add transformer method(s) in `transformer.py`
 1. Implement `serialize()` in the rule class
@@ -103,9 +118,9 @@ python -m unittest discover -s test -p "test_*.py" -v
 
 **Unit tests** (`test/unit/`): instantiate rule objects directly (no parsing).
 
-- `test/unit/rules/` — one file per rules module
-- `test/unit/cli/` — one file per CLI module
-- `test/unit/test_api.py`, `test_builder.py`, `test_deserializer.py`, `test_formatter.py`, `test_reconstructor.py`, `test_utils.py`
+- `rules/` — one file per rules module
+- `cli/` — one file per CLI module
+- `test_*.py` — tests for corresponding files from `hcl2/` directory
 
 Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`).
 
diff --git a/hcl2/parser.py b/hcl2/parser.py
index a33fe5f8..d275a589 100644
--- a/hcl2/parser.py
+++ b/hcl2/parser.py
@@ -4,6 +4,8 @@
 
 from lark import Lark
 
+from hcl2.postlexer import PostLexer
+
 
 PARSER_FILE = Path(__file__).absolute().resolve().parent / ".lark_cache.bin"
 
@@ -17,4 +19,5 @@ def parser() -> Lark:
         cache=str(PARSER_FILE),  # Disable/Delete file to effect changes to the grammar
         rel_to=__file__,
         propagate_positions=True,
+        postlex=PostLexer(),
     )
diff --git a/hcl2/postlexer.py b/hcl2/postlexer.py
new file mode 100644
index 00000000..1d22cc03
--- /dev/null
+++ b/hcl2/postlexer.py
@@ -0,0 +1,79 @@
+"""Postlexer that transforms the token stream between the Lark lexer and parser.
+
+Each transformation is implemented as a private method that accepts and yields
+tokens.  The public ``process`` method chains them together, making it easy to
+add new passes without touching existing logic.
+"""
+
+from typing import FrozenSet, Iterator, Optional, Tuple
+
+from lark import Token
+
+# Type alias for a token stream consumed and produced by each pass.
+TokenStream = Iterator[Token]
+
+# Operator token types that may legally follow a line-continuation newline.
+# MINUS is excluded — it is also the unary negation operator, and merging a
+# newline into MINUS would incorrectly consume statement-separating newlines
+# before negative literals (e.g. "a = 1\nb = -2").
+OPERATOR_TYPES: FrozenSet[str] = frozenset(
+    {
+        "DOUBLE_EQ",
+        "NEQ",
+        "LT",
+        "GT",
+        "LEQ",
+        "GEQ",
+        "ASTERISK",
+        "SLASH",
+        "PERCENT",
+        "DOUBLE_AMP",
+        "DOUBLE_PIPE",
+        "PLUS",
+        "QMARK",
+    }
+)
+
+
+class PostLexer:
+    """Transform the token stream before it reaches the LALR parser."""
+
+    def process(self, stream: TokenStream) -> TokenStream:
+        """Chain all postlexer passes over the token stream."""
+        yield from self._merge_newlines_into_operators(stream)
+
+    def _merge_newlines_into_operators(self, stream: TokenStream) -> TokenStream:
+        """Merge NL_OR_COMMENT tokens into immediately following operator tokens.
+
+        LALR parsers cannot distinguish a statement-ending newline from a
+        line-continuation newline before a binary operator.  This pass resolves
+        the ambiguity by merging NL_OR_COMMENT into the operator token's value
+        when the next token is a binary operator or QMARK.  The transformer
+        later extracts the newline prefix and creates a NewLineOrCommentRule
+        node, preserving round-trip fidelity.
+        """
+        pending_nl: Optional[Token] = None
+        for token in stream:
+            if token.type == "NL_OR_COMMENT":
+                if pending_nl is not None:
+                    yield pending_nl
+                pending_nl = token
+            else:
+                if pending_nl is not None:
+                    if token.type in OPERATOR_TYPES:
+                        token = token.update(value=str(pending_nl) + str(token))
+                    else:
+                        yield pending_nl
+                    pending_nl = None
+                yield token
+        if pending_nl is not None:
+            yield pending_nl
+
+    @property
+    def always_accept(self) -> Tuple[()]:
+        """Terminal names the parser must accept even when not expected by LALR.
+
+        Lark requires this property on postlexer objects.  An empty tuple
+        means no extra terminals are injected.
+        """
+        return ()
diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py
index 4760665a..cd1538ec 100644
--- a/hcl2/reconstructor.py
+++ b/hcl2/reconstructor.py
@@ -183,6 +183,11 @@ def _reconstruct_tree(
         # Check spacing BEFORE processing children, while _last_rule_name
         # still reflects the previous sibling (not a child of this tree).
         needs_space = self._should_add_space_before(tree, parent_rule_name)
+        if needs_space:
+            # A space will be inserted before this tree's output, so tell
+            # children that the last character was a space to prevent the
+            # first child from adding a duplicate leading space.
+            self._last_was_space = True
 
         if rule_name == UnaryOpRule.lark_name():
             for i, child in enumerate(tree.children):
diff --git a/hcl2/rules/expressions.py b/hcl2/rules/expressions.py
index c29859a3..6636d4c8 100644
--- a/hcl2/rules/expressions.py
+++ b/hcl2/rules/expressions.py
@@ -122,6 +122,7 @@ class ConditionalRule(ExpressionRule):
 
     _children_layout: Tuple[
         ExpressionRule,
+        Optional[NewLineOrCommentRule],
         QMARK,
         Optional[NewLineOrCommentRule],
         ExpressionRule,
@@ -137,7 +138,7 @@ def lark_name() -> str:
         return "conditional"
 
     def __init__(self, children, meta: Optional[Meta] = None):
-        self._insert_optionals(children, [2, 4, 6])
+        self._insert_optionals(children, [1, 3, 5, 7])
         super().__init__(children, meta)
 
     @property
@@ -148,12 +149,12 @@ def condition(self) -> ExpressionRule:
     @property
     def if_true(self) -> ExpressionRule:
         """Return the true-branch expression."""
-        return self._children[3]
+        return self._children[4]
 
     @property
     def if_false(self) -> ExpressionRule:
         """Return the false-branch expression."""
-        return self._children[7]
+        return self._children[8]
 
     def serialize(
         self, options=SerializationOptions(), context=SerializationContext()
@@ -179,6 +180,7 @@ class BinaryTermRule(ExpressionRule):
     """Rule for the operator+operand portion of a binary operation."""
 
     _children_layout: Tuple[
+        Optional[NewLineOrCommentRule],
         BinaryOperatorRule,
         Optional[NewLineOrCommentRule],
         ExprTermRule,
@@ -190,18 +192,18 @@ def lark_name() -> str:
         return "binary_term"
 
     def __init__(self, children, meta: Optional[Meta] = None):
-        self._insert_optionals(children, [1])
+        self._insert_optionals(children, [0, 2])
         super().__init__(children, meta)
 
     @property
     def binary_operator(self) -> BinaryOperatorRule:
         """Return the binary operator."""
-        return self._children[0]
+        return self._children[1]
 
     @property
     def expr_term(self) -> ExprTermRule:
         """Return the right-hand operand."""
-        return self._children[2]
+        return self._children[3]
 
     def serialize(
         self, options=SerializationOptions(), context=SerializationContext()
diff --git a/hcl2/transformer.py b/hcl2/transformer.py
index d0a09630..f6e94004 100644
--- a/hcl2/transformer.py
+++ b/hcl2/transformer.py
@@ -81,9 +81,15 @@ def __init__(self, discard_new_line_or_comments: bool = False):
 
     def __default_token__(self, token: Token) -> StringToken:
         # TODO make this return StaticStringToken where applicable
-        if token.value in StaticStringToken.classes_by_value:
-            return StaticStringToken.classes_by_value[token.value]()
-        return StringToken[token.type](token.value)  # type: ignore[misc]
+        value = token.value
+        # The EQ terminal /[ \t]*=(?!=|>)/ captures leading whitespace.
+        # Strip it so parsed and deserialized tokens produce the same "=" value,
+        # preventing the reconstructor from emitting double spaces.
+        if token.type == "EQ":
+            value = value.lstrip(" \t")
+        if value in StaticStringToken.classes_by_value:
+            return StaticStringToken.classes_by_value[value]()
+        return StringToken[token.type](value)  # type: ignore[misc]
 
     # pylint: disable=C0103
     def FLOAT_LITERAL(self, token: Token) -> FloatLiteral:
@@ -164,8 +170,32 @@ def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule:
     def expr_term(self, meta: Meta, args) -> ExprTermRule:
         return ExprTermRule(args, meta)
 
+    def _extract_nl_prefix(self, token):
+        """Strip leading newlines from a token value.
+
+        If the token contains a newline prefix (from the postlexer merging a
+        line-continuation newline into the operator token), strip it and
+        return a NewLineOrCommentRule.  Otherwise return None.
+        """
+        value = str(token.value)
+        stripped = value.lstrip("\n \t")
+        if len(stripped) == len(value):
+            return None
+        nl_text = value[: len(value) - len(stripped)]
+        token.set_value(stripped)
+        if self.discard_new_line_or_comments:
+            return None
+        return NewLineOrCommentRule.from_string(nl_text)
+
     @v_args(meta=True)
     def conditional(self, meta: Meta, args) -> ConditionalRule:
+        # args: [condition, QMARK, NL?, if_true, NL?, COLON, NL?, if_false]
+        # QMARK is at index 1 — check for NL prefix from the postlexer
+        qmark_token = args[1]
+        nl_rule = self._extract_nl_prefix(qmark_token)
+        if nl_rule is not None:
+            args = list(args)
+            args.insert(1, nl_rule)
         return ConditionalRule(args, meta)
 
     @v_args(meta=True)
@@ -174,6 +204,12 @@ def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule:
 
     @v_args(meta=True)
     def binary_term(self, meta: Meta, args) -> BinaryTermRule:
+        # args: [BinaryOperatorRule, NL?, ExprTermRule]
+        # The operator's token may contain a NL prefix from the postlexer
+        op_rule = args[0]
+        nl_rule = self._extract_nl_prefix(op_rule.token)
+        if nl_rule is not None:
+            args = [nl_rule] + list(args)
         return BinaryTermRule(args, meta)
 
     @v_args(meta=True)
diff --git a/test/integration/hcl2_original/smoke.tf b/test/integration/hcl2_original/smoke.tf
index 99537532..32e22466 100644
--- a/test/integration/hcl2_original/smoke.tf
+++ b/test/integration/hcl2_original/smoke.tf
@@ -43,6 +43,28 @@ block label1 label2 {
   }
 }
 
+block multiline_ternary {
+  foo = (
+    bar
+    ? baz(foo)
+    : foo == "bar"
+    ? "baz"
+    : foo
+  )
+}
+
+block multiline_binary_ops {
+  expr = {
+    for k, v in local.map_a : k => v
+    if lookup(local.map_b[v.id
+    ], "enabled", false)
+    || (
+    contains(local.map_c, v.id)
+    && contains(local.map_d, v.id)
+    )
+  }
+}
+
 block {
   route53_forwarding_rule_shares = {
     for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) :
diff --git a/test/integration/hcl2_reconstructed/smoke.tf b/test/integration/hcl2_reconstructed/smoke.tf
index c8529e70..655ee440 100644
--- a/test/integration/hcl2_reconstructed/smoke.tf
+++ b/test/integration/hcl2_reconstructed/smoke.tf
@@ -39,6 +39,20 @@ block label1 label2 {
 }
 
 
+block multiline_ternary {
+  foo = (bar ? baz(foo) : foo == "bar" ? "baz" : foo)
+}
+
+
+block multiline_binary_ops {
+  expr = {
+    for k, v in local.map_a :
+    k => v
+    if lookup(local.map_b[v.id], "enabled", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))
+  }
+}
+
+
 block {
   route53_forwarding_rule_shares = {
     for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) :
diff --git a/test/integration/json_reserialized/smoke.json b/test/integration/json_reserialized/smoke.json
index dbff114f..ec003473 100644
--- a/test/integration/json_reserialized/smoke.json
+++ b/test/integration/json_reserialized/smoke.json
@@ -48,6 +48,18 @@
     }
    }
   },
+  {
+   "multiline_ternary": {
+    "foo": "${(bar ? baz(foo) : foo == \"bar\" ? \"baz\" : foo)}",
+    "__is_block__": true
+   }
+  },
+  {
+   "multiline_binary_ops": {
+    "expr": "${{for k, v in local.map_a : k => v if lookup(local.map_b[v.id], \"enabled\", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))}}",
+    "__is_block__": true
+   }
+  },
   {
    "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}",
    "__is_block__": true
diff --git a/test/integration/json_serialized/smoke.json b/test/integration/json_serialized/smoke.json
index dbff114f..ec003473 100644
--- a/test/integration/json_serialized/smoke.json
+++ b/test/integration/json_serialized/smoke.json
@@ -48,6 +48,18 @@
     }
    }
   },
+  {
+   "multiline_ternary": {
+    "foo": "${(bar ? baz(foo) : foo == \"bar\" ? \"baz\" : foo)}",
+    "__is_block__": true
+   }
+  },
+  {
+   "multiline_binary_ops": {
+    "expr": "${{for k, v in local.map_a : k => v if lookup(local.map_b[v.id], \"enabled\", false) || (contains(local.map_c, v.id) && contains(local.map_d, v.id))}}",
+    "__is_block__": true
+   }
+  },
   {
    "route53_forwarding_rule_shares": "${{for forwarding_rule_key in keys(var.route53_resolver_forwarding_rule_shares) : \"${forwarding_rule_key}\" => {aws_account_ids = [for account_name in var.route53_resolver_forwarding_rule_shares[forwarding_rule_key].aws_account_names : module.remote_state_subaccounts.map[account_name].outputs[\"aws_account_id\"]]}... if substr(bucket_name, 0, 1) == \"l\"}}",
    "__is_block__": true
diff --git a/test/unit/rules/test_expressions.py b/test/unit/rules/test_expressions.py
index 974885b5..8ab7e8db 100644
--- a/test/unit/rules/test_expressions.py
+++ b/test/unit/rules/test_expressions.py
@@ -194,8 +194,8 @@ def test_lark_name(self):
 
     def test_construction_inserts_optional_slots(self):
         rule = self._make_conditional()
-        # Should have 8 children after _insert_optionals at [2, 4, 6]
-        self.assertEqual(len(rule.children), 8)
+        # Should have 9 children after _insert_optionals at [1, 3, 5, 7]
+        self.assertEqual(len(rule.children), 9)
 
     def test_condition_property(self):
         cond = StubExpression("cond")
@@ -274,9 +274,10 @@ def test_lark_name(self):
 
     def test_construction_inserts_optional(self):
         rule = _make_binary_term("+", "b")
-        # [BinaryOperatorRule, None, ExprTermRule]
-        self.assertEqual(len(rule.children), 3)
-        self.assertIsNone(rule.children[1])
+        # [None, BinaryOperatorRule, None, ExprTermRule]
+        self.assertEqual(len(rule.children), 4)
+        self.assertIsNone(rule.children[0])
+        self.assertIsNone(rule.children[2])
 
     def test_binary_operator_property(self):
         op = _make_binary_operator("+")
diff --git a/test/unit/test_postlexer.py b/test/unit/test_postlexer.py
new file mode 100644
index 00000000..07a8164b
--- /dev/null
+++ b/test/unit/test_postlexer.py
@@ -0,0 +1,150 @@
+# pylint: disable=C0103,C0114,C0115,C0116
+"""Unit tests for hcl2.postlexer.
+
+Tests parse real HCL2 snippets through the full pipeline to verify that the
+postlexer correctly handles newlines before binary operators and QMARK.
+"""
+
+from unittest import TestCase
+
+from lark import Token
+
+from hcl2.api import loads
+from hcl2.postlexer import OPERATOR_TYPES, PostLexer
+
+
+class TestMergeNewlinesIntoOperators(TestCase):
+    """Test _merge_newlines_into_operators at the token-stream level."""
+
+    def _run(self, tokens):
+        """Run the postlexer pass and return a list of tokens."""
+        return list(PostLexer()._merge_newlines_into_operators(iter(tokens)))
+
+    def test_no_newlines_passes_through(self):
+        tokens = [Token("NAME", "a"), Token("PLUS", "+"), Token("NAME", "b")]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "+")
+
+    def test_newline_before_operator_is_merged(self):
+        tokens = [
+            Token("NAME", "a"),
+            Token("NL_OR_COMMENT", "\n    "),
+            Token("PLUS", "+"),
+            Token("NAME", "b"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "\n    +")
+
+    def test_newline_before_non_operator_is_preserved(self):
+        tokens = [
+            Token("NAME", "a"),
+            Token("NL_OR_COMMENT", "\n"),
+            Token("NAME", "b"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 3)
+        self.assertEqual(result[1].type, "NL_OR_COMMENT")
+
+    def test_consecutive_newlines_first_yielded(self):
+        tokens = [
+            Token("NL_OR_COMMENT", "\n"),
+            Token("NL_OR_COMMENT", "\n    "),
+            Token("PLUS", "+"),
+        ]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result[0].type, "NL_OR_COMMENT")
+        self.assertEqual(str(result[0]), "\n")
+        self.assertEqual(result[1].type, "PLUS")
+        self.assertEqual(str(result[1]), "\n    +")
+
+    def test_trailing_newline_is_yielded(self):
+        tokens = [Token("NAME", "a"), Token("NL_OR_COMMENT", "\n")]
+        result = self._run(tokens)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(result[1].type, "NL_OR_COMMENT")
+
+    def test_all_operator_types_are_merged(self):
+        for op_type in sorted(OPERATOR_TYPES):
+            with self.subTest(op_type=op_type):
+                tokens = [
+                    Token("NL_OR_COMMENT", "\n"),
+                    Token(op_type, "x"),
+                ]
+                result = self._run(tokens)
+                self.assertEqual(len(result), 1)
+                self.assertEqual(result[0].type, op_type)
+                self.assertTrue(str(result[0]).startswith("\n"))
+
+    def test_minus_not_in_operator_types(self):
+        self.assertNotIn("MINUS", OPERATOR_TYPES)
+
+
+class TestMultilineOperatorParsing(TestCase):
+    """Test that HCL2 snippets with multiline operators parse correctly."""
+
+    def test_multiline_ternary(self):
+        hcl = 'x = (\n  a\n  ? "yes"\n  : "no"\n)\n'
+        result = loads(hcl)
+        self.assertEqual(result["x"], '${(a ? "yes" : "no")}')
+
+    def test_multiline_logical_or(self):
+        hcl = "x = (\n  a\n  || b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a || b)}")
+
+    def test_multiline_logical_and(self):
+        hcl = "x = (\n  a\n  && b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a && b)}")
+
+    def test_multiline_equality(self):
+        hcl = "x = (\n  a\n  == b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a == b)}")
+
+    def test_multiline_not_equal(self):
+        hcl = "x = (\n  a\n  != b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a != b)}")
+
+    def test_multiline_comparison(self):
+        hcl = "x = (\n  a\n  >= b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a >= b)}")
+
+    def test_multiline_addition(self):
+        hcl = "x = (\n  a\n  + b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a + b)}")
+
+    def test_multiline_multiplication(self):
+        hcl = "x = (\n  a\n  * b\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a * b)}")
+
+    def test_multiline_chained_operators(self):
+        hcl = "x = (\n  a\n  && b\n  && c\n)\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${(a && b && c)}")
+
+    def test_multiline_nested_ternary(self):
+        hcl = 'x = (\n  a\n  ? b\n  : c == "d"\n  ? "e"\n  : f\n)\n'
+        result = loads(hcl)
+        self.assertEqual(result["x"], '${(a ? b : c == "d" ? "e" : f)}')
+
+    def test_minus_on_new_line_is_separate_attribute(self):
+        """MINUS is excluded from merging — newline before - starts a new statement."""
+        hcl = "a = 1\nb = -2\n"
+        result = loads(hcl)
+        self.assertEqual(result["a"], 1)
+        self.assertIn("b", result)
+
+    def test_single_line_operators_still_work(self):
+        hcl = "x = a + b\n"
+        result = loads(hcl)
+        self.assertEqual(result["x"], "${a + b}")

From 83c58a3ca886e3a576b2b20871df0a6a85c2d60c Mon Sep 17 00:00:00 2001
From: Kamil Kozik <kkozik@amplify.com>
Date: Wed, 11 Mar 2026 11:53:07 +0100
Subject: [PATCH 4/4] CLAUED.md - add pre-commit section

---
 CLAUDE.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CLAUDE.md b/CLAUDE.md
index 833629f0..6823678a 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -131,6 +131,14 @@ Use concrete stubs when testing ABCs (e.g., `StubExpression(ExpressionRule)`).
 
 Always run round-trip full test suite after any modification.
 
+## Pre-commit Checks
+
+Hooks are defined in `.pre-commit-config.yaml` (includes black, mypy, pylint, and others). All changed files must pass these checks before committing. When writing or modifying code:
+
+- Format Python with **black** (Python 3.8 target).
+- Ensure **mypy** and **pylint** pass. Pylint config is in `pylintrc`, scoped to `hcl2/` and `test/`.
+- End files with a newline; strip trailing whitespace (except under `test/integration/(hcl2_reconstructed|specialized)/`).
+
 ## Keeping Docs Current
 
 Update this file when architecture, modules, API surface, or testing conventions change. Also update `README.md` and `docs/usage.md` when changes affect the public API, CLI flags, or option fields.