From 6c7269043456fc88c24849601b0791ecc009e87a Mon Sep 17 00:00:00 2001 From: Harold Sun Date: Fri, 15 May 2026 10:30:27 -0700 Subject: [PATCH] refactor(cfn-lang-ext): extract shared Mapping-name primitives for Fn::ForEach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #9009 (issue #9005). Closes the second class of structural divergence the bot reviews on that PR repeatedly caught: the Mapping name, Fn::FindInMap lookup key, and compound Mapping table key for nested Fn::ForEach were each constructed inline in two places — once in the build- time pipeline (samcli/commands/build/build_context.py) and once in the package-time pipeline (samcli/lib/package/language_extensions_packaging.py). Bot review #2 caught the missing prefix list; bot review #3 caught the collision counter keyed on the dotted path. Both bugs were duplicate string-construction logic drifting between sites. This PR extracts the three computations into a single module: samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py with three pure functions: - compute_mapping_name(leaf, nesting_path, *, has_collision, resource_template_key) - compute_lookup_key(loop_variable, referenced_outer_vars) - compute_compound_mapping_key(outer_values, inner_value) Both pipelines now call these helpers; neither constructs the strings inline. The cross-pipeline equivalence test in tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py asserts that build's collision-counting input and package's collision- counting input produce byte-identical Mapping names for the same logical inputs — locking in the structural guarantee. Mechanical changes: - samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py (new) — three pure helpers + module docstring + __all__. - tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py (new) — 19 tests including parameterized construction cases and three cross-pipeline equivalence cases. - samcli/lib/package/language_extensions_packaging.py — _compute_mapping_name body becomes a 3-line delegation; _replace_dynamic_artifact_with_findmap's default mapping_name and inline lookup_key construction become helper calls; _generate_artifact_mappings's inline compound-key construction becomes a helper call. - samcli/commands/build/build_context.py — _update_foreach_artifact_paths's inline mapping_name and lookup_key constructions become helper calls; _collect_nested_mapping_entry's inline compound-key construction becomes a helper call. Collapses two if/else branches in the process. Removes the now-unused sanitize_resource_key_for_mapping import. What this PR does NOT do: - It does not unify orchestration. The build walker stays a recursive walker; the package detect-then-emit pipeline stays a detect-then-emit pipeline. They just compute Mapping identifiers through one shared source of truth. - It does not touch auto-dependency-layer Mapping emission (_collect_foreach_layer_mappings). That stays in build_context.py. - It does not change DynamicArtifactProperty or PACKAGEABLE_RESOURCE_ARTIFACT_PROPERTIES. No detection changes. Verification: - baseline 9191 passed, 25 skipped → after 9211 passed, 25 skipped (+20) - targeted: 2052 passed across cfn_language_extensions, package, build - ruff check, mypy, black --check all clean - end-to-end #9005 repro (sam package against language-extensions/tc-026-nested-application) still rewrites Properties.Location to an https://s3... URL - leaf-collision regression tests from #9009 commit 5e818202f still pass --- samcli/commands/build/build_context.py | 48 ++--- .../foreach_mapping_helpers.py | 84 ++++++++ .../package/language_extensions_packaging.py | 66 +++--- .../test_foreach_mapping_helpers.py | 204 ++++++++++++++++++ 4 files changed, 344 insertions(+), 58 deletions(-) create mode 100644 samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py create mode 100644 tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py diff --git a/samcli/commands/build/build_context.py b/samcli/commands/build/build_context.py index 1c6355b030..537ae24076 100644 --- a/samcli/commands/build/build_context.py +++ b/samcli/commands/build/build_context.py @@ -38,7 +38,6 @@ from samcli.lib.build.workflow_config import UnsupportedRuntimeException from samcli.lib.cfn_language_extensions.sam_integration import ( contains_loop_variable, - sanitize_resource_key_for_mapping, substitute_loop_variable, ) from samcli.lib.cfn_language_extensions.utils import is_foreach_key @@ -551,6 +550,10 @@ def _update_foreach_artifact_paths( Dict[str, Dict[str, Dict[str, str]]] Generated Mappings section for dynamic artifact properties (empty dict if none) """ + from samcli.lib.cfn_language_extensions.foreach_mapping_helpers import ( + compute_lookup_key, + compute_mapping_name, + ) from samcli.lib.cfn_language_extensions.models import PACKAGEABLE_RESOURCE_ARTIFACT_PROPERTIES from samcli.lib.cfn_language_extensions.sam_integration import resolve_collection from samcli.lib.package.language_extensions_packaging import ( @@ -634,24 +637,23 @@ def _update_foreach_artifact_paths( referenced_outer_vars=referenced_outer_vars, ) if mapping_entries: - mapping_name = f"SAM{leaf_name}{nesting_path}" - # Collision count is keyed on the leaf so dotted paths - # that share a leaf (e.g. "ImageUri" vs "Code.ImageUri") - # get the resource-key suffix and don't overwrite each - # other's Mapping entries. - if dynamic_props_count.get(leaf_name, 0) > 1: - suffix = sanitize_resource_key_for_mapping(resource_template_key) - mapping_name = f"{mapping_name}{suffix}" + # Collision keying on the leaf is what prevents two dotted + # paths that share a leaf (e.g. ``ImageUri`` and + # ``Code.ImageUri``) from silently overwriting each other's + # Mapping entries. ``compute_mapping_name`` is shared with + # the package-time pipeline. + mapping_name = compute_mapping_name( + leaf_name, + nesting_path, + has_collision=dynamic_props_count.get(leaf_name, 0) > 1, + resource_template_key=resource_template_key, + ) generated_mappings[mapping_name] = mapping_entries - lookup_key: Any - if referenced_outer_vars: - # Compound key: join outer + inner variable refs with "-" - ref_parts = [{"Ref": ovar} for ovar, _ in referenced_outer_vars] - ref_parts.append({"Ref": loop_variable}) - lookup_key = {"Fn::Join": ["-", ref_parts]} - else: - lookup_key = {"Ref": loop_variable} + lookup_key = compute_lookup_key( + loop_variable, + [ovar for ovar, _ in referenced_outer_vars], + ) _set_prop_value(properties, prop_name, {"Fn::FindInMap": [mapping_name, lookup_key, leaf_name]}) else: @@ -830,6 +832,7 @@ def _collect_nested_mapping_entry( """ import itertools + from samcli.lib.cfn_language_extensions.foreach_mapping_helpers import compute_compound_mapping_key from samcli.lib.package.language_extensions_packaging import _leaf_prop_name leaf_name = _leaf_prop_name(prop_name) @@ -849,13 +852,10 @@ def _collect_nested_mapping_entry( if artifact_value is None: continue - if compound_outer_vars: - # Build compound key from referenced outer values + inner value - key_parts = [oval for ovar, oval in zip(outer_vars, outer_combo) if ovar in compound_outer_vars] - key_parts.append(coll_value) - mapping_key = "-".join(key_parts) - else: - mapping_key = coll_value + # Outer values participating in the compound key, in iteration order. + # When ``compound_outer_vars`` is empty this collapses to just ``coll_value``. + outer_key_parts = [oval for ovar, oval in zip(outer_vars, outer_combo) if ovar in compound_outer_vars] + mapping_key = compute_compound_mapping_key(outer_key_parts, coll_value) if mapping_key not in mapping_entries: mapping_entries[mapping_key] = {leaf_name: artifact_value} diff --git a/samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py b/samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py new file mode 100644 index 0000000000..e0f45b4897 --- /dev/null +++ b/samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py @@ -0,0 +1,84 @@ +""" +Pure helpers that compute the three identifiers SAM CLI emits when rewriting +dynamic ``Fn::ForEach`` artifact properties: the ``Mappings`` table name, the +second argument of ``Fn::FindInMap`` (the lookup key), and the inner-Mapping +table key for nested ``Fn::ForEach`` value combinations. + +These were previously inline in two near-duplicate places — +``samcli/commands/build/build_context.py::_update_foreach_artifact_paths`` / +``_collect_nested_mapping_entry`` (build-time) and +``samcli/lib/package/language_extensions_packaging.py::_compute_mapping_name`` / +``_replace_dynamic_artifact_with_findmap`` / +``_generate_artifact_mappings`` (package-time). Each round of bot review on +PR #9009 caught a divergence between those copies (collision keying on the +dotted path vs. the leaf, missing prefixes, FindInMap third-arg spelling). + +Co-locating the three computations in one module makes drift between the two +pipelines structurally impossible: there is one source of truth for what +SAM-emitted Mapping identifiers look like. +""" + +from typing import Any, List + +from samcli.lib.cfn_language_extensions.sam_integration import sanitize_resource_key_for_mapping + + +def compute_mapping_name( + leaf: str, + nesting_path: str, + *, + has_collision: bool, + resource_template_key: str, +) -> str: + """Build the ``SAM[]`` Mapping name. + + The base name is ``SAM``. When ``has_collision`` is + True (multiple resources in the same ``Fn::ForEach`` body share the same + ``(nesting_path, leaf)`` pair), a sanitized suffix derived from the + resource logical-ID template is appended to keep Mapping names unique. + + ``leaf`` must be the leaf segment of the property path + (e.g. ``"ScriptLocation"``, not ``"Command.ScriptLocation"``) — Mapping + names must be alphanumeric. + """ + base = f"SAM{leaf}{nesting_path}" + if not has_collision: + return base + return f"{base}{sanitize_resource_key_for_mapping(resource_template_key)}" + + +def compute_lookup_key(loop_variable: str, referenced_outer_vars: List[str]) -> Any: + """Build the second argument of ``Fn::FindInMap``. + + Returns ``{"Ref": loop_variable}`` when no outer ``Fn::ForEach`` loops are + referenced, or ``{"Fn::Join": ["-", [{"Ref": ovar}, ..., {"Ref": loop_variable}]]}`` + otherwise. Order matches the order of ``referenced_outer_vars`` in the + list, with the inner ``loop_variable`` appended at the end. This mirrors + the order used to build the corresponding compound Mapping key in + :func:`compute_compound_mapping_key`. + """ + if not referenced_outer_vars: + return {"Ref": loop_variable} + + ref_parts: List[Any] = [{"Ref": ovar} for ovar in referenced_outer_vars] + ref_parts.append({"Ref": loop_variable}) + return {"Fn::Join": ["-", ref_parts]} + + +def compute_compound_mapping_key(outer_values: List[str], inner_value: str) -> str: + """Build the dash-joined Mapping table key for a nested-``Fn::ForEach`` + value combination. + + Example: ``compute_compound_mapping_key(["Dev", "Api"], "Users")`` → + ``"Dev-Api-Users"``. Outer values come first, in the same order as the + ``referenced_outer_vars`` passed to :func:`compute_lookup_key`, with the + inner-loop value appended at the end. + """ + return "-".join(list(outer_values) + [inner_value]) + + +__all__ = [ + "compute_compound_mapping_key", + "compute_lookup_key", + "compute_mapping_name", +] diff --git a/samcli/lib/package/language_extensions_packaging.py b/samcli/lib/package/language_extensions_packaging.py index 9eb39366ae..230c04e9ff 100644 --- a/samcli/lib/package/language_extensions_packaging.py +++ b/samcli/lib/package/language_extensions_packaging.py @@ -17,6 +17,11 @@ import jmespath from botocore.utils import set_value_from_jmespath +from samcli.lib.cfn_language_extensions.foreach_mapping_helpers import ( + compute_compound_mapping_key, + compute_lookup_key, + compute_mapping_name, +) from samcli.lib.cfn_language_extensions.models import ( PACKAGEABLE_RESOURCE_ARTIFACT_PROPERTIES, DynamicArtifactProperty, @@ -359,34 +364,27 @@ def _compute_mapping_name( collision_groups: Dict[Tuple[str, str], int], ) -> str: """ - Compute a unique Mapping name, adding a resource-key suffix when multiple - resources share the same (loop_name, property_name). - - The suffix is derived from the resource logical ID template (resource_key) - with loop variable placeholders stripped. This is guaranteed unique because - resource keys are unique within a ForEach body. + Compute a unique Mapping name from a DynamicArtifactProperty. - Examples (collision on DefinitionUri in Fn::ForEach::Services): - - resource_key="${Svc}Api" -> SAMDefinitionUriServicesApi - - resource_key="${Svc}StateMachine" -> SAMDefinitionUriServicesStateMachine + Delegates to ``compute_mapping_name`` in + ``samcli/lib/cfn_language_extensions/foreach_mapping_helpers.py`` (the + same helper the build-time pipeline uses), so package-time and build-time + Mapping names cannot diverge. - When there is no collision the base name is returned unchanged for backward - compatibility. + Collision detection is keyed on ``(nesting_path, leaf)`` — dotted paths + that share a leaf (e.g. ``ImageUri`` on Serverless::Function vs. + ``Code.ImageUri`` on Lambda::Function) collide and get a resource-key + suffix to keep their Mapping entries distinct. """ npath = _nesting_path(prop) - # Use the leaf segment for the Mapping name so dotted property paths (e.g. - # "Command.ScriptLocation") yield well-formed alphanumeric Mapping names. leaf = _leaf_prop_name(prop.property_name) - base_name = f"SAM{leaf}{npath}" - # Collision detection must also key on the leaf — two resources with - # different dotted paths but the same leaf (e.g. "ImageUri" vs - # "Code.ImageUri") would otherwise generate identical Mapping names and - # silently overwrite each other. - key = (npath, leaf) - if collision_groups.get(key, 0) <= 1: - return base_name - suffix = _sanitize_resource_key_for_mapping(prop.resource_key) - return f"{base_name}{suffix}" + has_collision = collision_groups.get((npath, leaf), 0) > 1 + return compute_mapping_name( + leaf, + npath, + has_collision=has_collision, + resource_template_key=prop.resource_key, + ) def _generate_artifact_mappings( @@ -439,7 +437,7 @@ def _generate_artifact_mappings( for combo in itertools.product(*outer_collections, prop.collection): outer_values = list(combo[:-1]) inner_value = combo[-1] - compound_key = "-".join(list(outer_values) + [inner_value]) + compound_key = compute_compound_mapping_key(outer_values, inner_value) expanded_resource_key = prop.resource_key for outer_var, outer_val in zip(outer_vars, outer_values): @@ -589,8 +587,15 @@ def _replace_dynamic_artifact_with_findmap( Replace a dynamic artifact property value with Fn::FindInMap reference. """ if mapping_name is None: - # Use the leaf segment so dotted paths produce alphanumeric Mapping names. - mapping_name = f"SAM{_leaf_prop_name(prop.property_name)}{_nesting_path(prop)}" + # Default: the bare base shape (caller didn't pre-compute a collision-aware + # name from a Counter, so we assume no collision). Real callers go through + # ``_compute_mapping_name`` which threads ``collision_groups``. + mapping_name = compute_mapping_name( + _leaf_prop_name(prop.property_name), + _nesting_path(prop), + has_collision=False, + resource_template_key=prop.resource_key, + ) current_scope = resources if prop.outer_loops: @@ -625,20 +630,13 @@ def _replace_dynamic_artifact_with_findmap( LOG.warning("Properties is not a dict for resource %s in %s", prop.resource_key, prop.foreach_key) return False - uses_compound_keys = False referenced_outer_vars: List[str] = [] if prop.outer_loops: for _, outer_var, _ in prop.outer_loops: if contains_loop_variable(prop.property_value, outer_var): - uses_compound_keys = True referenced_outer_vars.append(outer_var) - if uses_compound_keys and referenced_outer_vars: - ref_parts = [{"Ref": ovar} for ovar in referenced_outer_vars] - ref_parts.append({"Ref": prop.loop_variable}) - lookup_key: Any = {"Fn::Join": ["-", ref_parts]} - else: - lookup_key = {"Ref": prop.loop_variable} + lookup_key = compute_lookup_key(prop.loop_variable, referenced_outer_vars) # Address the property at its dotted location (creating intermediate dicts # if needed) and use the leaf segment as the FindInMap third argument so diff --git a/tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py b/tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py new file mode 100644 index 0000000000..11d0c15f53 --- /dev/null +++ b/tests/unit/lib/cfn_language_extensions/test_foreach_mapping_helpers.py @@ -0,0 +1,204 @@ +"""Tests for ``samcli.lib.cfn_language_extensions.foreach_mapping_helpers``. + +Verifies the three pure helpers — ``compute_mapping_name``, +``compute_lookup_key``, ``compute_compound_mapping_key`` — and asserts that +the build-time (``samcli/commands/build/build_context.py``) and package-time +(``samcli/lib/package/language_extensions_packaging.py``) call sites produce +the same Mapping name when fed equivalent inputs (cross-pipeline equivalence). +""" + +from collections import Counter +from unittest import TestCase + +from parameterized import parameterized + +from samcli.lib.cfn_language_extensions.exceptions import InvalidTemplateException +from samcli.lib.cfn_language_extensions.foreach_mapping_helpers import ( + compute_compound_mapping_key, + compute_lookup_key, + compute_mapping_name, +) + + +class TestComputeMappingName(TestCase): + @parameterized.expand( + [ + # (leaf, nesting_path, has_collision, resource_template_key, expected) + # No collision: bare base name, resource_template_key is unused. + ("CodeUri", "Services", False, "${Svc}Function", "SAMCodeUriServices"), + ("ContentUri", "Layers", False, "${Name}Layer", "SAMContentUriLayers"), + ("ScriptLocation", "Jobs", False, "${Name}Job", "SAMScriptLocationJobs"), + # Collision: append sanitized resource-template-key suffix. + ("CodeUri", "Services", True, "${Svc}Api", "SAMCodeUriServicesApi"), + ("CodeUri", "Services", True, "${Svc}StateMachine", "SAMCodeUriServicesStateMachine"), + ("ImageUri", "Funcs", True, "${Name}Lambda", "SAMImageUriFuncsLambda"), + # Nested loop (nesting_path is the concatenation of ancestor + current). + ("CodeUri", "EnvsServices", False, "${Env}${Svc}Function", "SAMCodeUriEnvsServices"), + ("DefinitionUri", "EnvsServices", True, "${Env}${Svc}Api", "SAMDefinitionUriEnvsServicesApi"), + ] + ) + def test_construction(self, leaf, nesting_path, has_collision, resource_template_key, expected): + self.assertEqual( + compute_mapping_name( + leaf, + nesting_path, + has_collision=has_collision, + resource_template_key=resource_template_key, + ), + expected, + ) + + def test_resource_template_key_with_no_static_segment_raises(self): + # sanitize_resource_key_for_mapping rejects keys whose static portion is + # empty — propagates as InvalidTemplateException so the caller surfaces a + # useful error instead of silently producing colliding Mapping names. + with self.assertRaises(InvalidTemplateException): + compute_mapping_name( + "CodeUri", + "Services", + has_collision=True, + resource_template_key="${OnlyAVariable}", + ) + + +class TestComputeLookupKey(TestCase): + def test_no_outer_vars_uses_simple_ref(self): + self.assertEqual(compute_lookup_key("Name", []), {"Ref": "Name"}) + + def test_one_outer_var_uses_fn_join(self): + self.assertEqual( + compute_lookup_key("Svc", ["Env"]), + {"Fn::Join": ["-", [{"Ref": "Env"}, {"Ref": "Svc"}]]}, + ) + + def test_multiple_outer_vars_preserves_order(self): + self.assertEqual( + compute_lookup_key("Svc", ["Env", "Region"]), + {"Fn::Join": ["-", [{"Ref": "Env"}, {"Ref": "Region"}, {"Ref": "Svc"}]]}, + ) + + def test_none_outer_vars_falls_back_to_simple_ref(self): + # The build-side caller may pass an empty referenced_outer_vars list; + # treat it the same as an explicit empty list. + self.assertEqual(compute_lookup_key("Name", []), {"Ref": "Name"}) + + +class TestComputeCompoundMappingKey(TestCase): + def test_no_outer_values_returns_inner_only(self): + self.assertEqual(compute_compound_mapping_key([], "Users"), "Users") + + def test_single_outer_value(self): + self.assertEqual(compute_compound_mapping_key(["Dev"], "Users"), "Dev-Users") + + def test_multiple_outer_values_in_order(self): + self.assertEqual(compute_compound_mapping_key(["Dev", "Api"], "Users"), "Dev-Api-Users") + + +class TestCrossPipelineEquivalence(TestCase): + """Lock in: build's call shape and package's call shape produce identical + Mapping names for the same logical inputs. + + Build-time (``_update_foreach_artifact_paths``): + leaf_name = leaf_prop_name(prop_name) + has_collision = dynamic_props_count.get(leaf_name, 0) > 1 + + Package-time (``_compute_mapping_name``): + leaf = leaf_prop_name(prop.property_name) + has_collision = collision_groups.get((nesting_path, leaf), 0) > 1 + + For the same set of (nesting_path, leaf, resource_template_key) inputs, + both code paths must yield byte-identical Mapping names. If a future + refactor changes one helper but forgets the other, this test fails loudly. + """ + + def test_no_collision_paths_match(self): + # A simulated set of dynamic-property events: one (nesting_path, leaf) + # appears once, so neither caller should see a collision. + events = [("Services", "CodeUri", "${Svc}Function")] + + build_count = Counter(leaf for _, leaf, _ in events) + package_groups = Counter((path, leaf) for path, leaf, _ in events) + + for path, leaf, key in events: + build_name = compute_mapping_name( + leaf, + path, + has_collision=build_count.get(leaf, 0) > 1, + resource_template_key=key, + ) + package_name = compute_mapping_name( + leaf, + path, + has_collision=package_groups.get((path, leaf), 0) > 1, + resource_template_key=key, + ) + self.assertEqual(build_name, package_name, f"divergence at {path}/{leaf}") + + def test_collision_within_single_loop_paths_match(self): + # Two resources in the same Fn::ForEach body share a leaf — the + # resource-key suffix should be applied by both pipelines. + events = [ + ("Services", "DefinitionUri", "${Svc}Api"), + ("Services", "DefinitionUri", "${Svc}StateMachine"), + ] + + build_count = Counter(leaf for _, leaf, _ in events) + package_groups = Counter((path, leaf) for path, leaf, _ in events) + + names_via_build = [] + names_via_package = [] + for path, leaf, key in events: + names_via_build.append( + compute_mapping_name( + leaf, + path, + has_collision=build_count.get(leaf, 0) > 1, + resource_template_key=key, + ) + ) + names_via_package.append( + compute_mapping_name( + leaf, + path, + has_collision=package_groups.get((path, leaf), 0) > 1, + resource_template_key=key, + ) + ) + + self.assertEqual(names_via_build, names_via_package) + # Both names exist and are distinct — neither resource silently + # overwrites the other's Mapping entries. + self.assertEqual(len(set(names_via_build)), 2) + + def test_dotted_paths_with_shared_leaf_collide(self): + # AWS::Serverless::Function.ImageUri and AWS::Lambda::Function.Code.ImageUri + # have different dotted paths but the same leaf "ImageUri". The + # collision-detection key on both pipelines is keyed on the leaf, so + # both produce the same Mapping name and get the resource-key suffix. + # (Regression for the bug bot review #3 caught on PR #9009.) + events = [ + ("Funcs", "ImageUri", "${Name}Sam"), + ("Funcs", "ImageUri", "${Name}Lambda"), + ] + + build_count = Counter(leaf for _, leaf, _ in events) + package_groups = Counter((path, leaf) for path, leaf, _ in events) + + names = [] + for path, leaf, key in events: + build_name = compute_mapping_name( + leaf, + path, + has_collision=build_count.get(leaf, 0) > 1, + resource_template_key=key, + ) + package_name = compute_mapping_name( + leaf, + path, + has_collision=package_groups.get((path, leaf), 0) > 1, + resource_template_key=key, + ) + self.assertEqual(build_name, package_name) + names.append(build_name) + + self.assertEqual(names, ["SAMImageUriFuncsSam", "SAMImageUriFuncsLambda"])