diff --git a/samcli/commands/deploy/deploy_context.py b/samcli/commands/deploy/deploy_context.py index 33ac1711568..aa710aca928 100644 --- a/samcli/commands/deploy/deploy_context.py +++ b/samcli/commands/deploy/deploy_context.py @@ -15,11 +15,13 @@ # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. +import json import logging import os from typing import Dict, List, Optional import boto3 +import botocore.exceptions import click from samcli.commands.deploy import exceptions as deploy_exceptions @@ -39,6 +41,19 @@ LOG = logging.getLogger(__name__) +_SAM_ECR_POLICY_SID = "SAMCliLambdaECRAccess" + +_LAMBDA_ECR_POLICY_STATEMENT = { + "Sid": _SAM_ECR_POLICY_SID, + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:GetRepositoryPolicy", + ], +} + class DeployContext: MSG_SHOWCASE_CHANGESET = "\nChangeset created successfully. {changeset_id}\n" @@ -151,6 +166,14 @@ def run(self): self.deployer = Deployer(cloudformation_client, client_sleep=self.poll_delay) + if self.image_repositories or self.image_repository: + ecr_client = boto3.client("ecr", region_name=self.region if self.region else None, config=boto_config) + _ensure_ecr_lambda_pull_policy( + ecr_client, + self.image_repositories if isinstance(self.image_repositories, dict) else None, + self.image_repository or None, + ) + region = s3_client._client_config.region_name if s3_client else self.region # pylint: disable=W0212 display_parameter_overrides = hide_noecho_parameter_overrides(template_dict, self.parameter_overrides) print_deploy_args( @@ -334,3 +357,98 @@ def merge_parameters(template_dict: Dict, parameter_overrides: Dict) -> List[Dic parameter_values.append(obj) return parameter_values + + +def _extract_ecr_repo_name(ecr_uri: str) -> str: + """ + Extract the ECR repository name from a full ECR URI. + + Examples + -------- + 123456789012.dkr.ecr.us-east-1.amazonaws.com/my-repo:latest -> my-repo + 123456789012.dkr.ecr.us-east-1.amazonaws.com/org/my-repo:v1 -> org/my-repo + """ + parts = ecr_uri.split("/", 1) + repo_with_tag = parts[1] if len(parts) > 1 else parts[0] + return repo_with_tag.split(":")[0] + + +def _ensure_ecr_lambda_pull_policy( + ecr_client, + image_repositories: Optional[Dict[str, str]], + image_repository: Optional[str], +) -> None: + """ + Pre-set Lambda pull permissions on all ECR repositories referenced by + --image-repositories / --image-repository before the CloudFormation + changeset is created. + + This prevents a race condition where CloudFormation's concurrent Lambda + resource creation calls SetRepositoryPolicy in parallel, overwriting each + other and causing intermittent 403 access errors (GitHub issue #8190). + """ + if not image_repositories and not image_repository: + return + + uris = list((image_repositories or {}).values()) + if image_repository: + uris.append(image_repository) + + unique_repo_names = {_extract_ecr_repo_name(uri) for uri in uris if uri} + + for repo_name in unique_repo_names: + _upsert_ecr_lambda_policy(ecr_client, repo_name) + + +def _upsert_ecr_lambda_policy(ecr_client, repo_name: str) -> None: + """ + Idempotently upsert a Lambda pull policy statement on a single ECR repo. + + Soft-fails on AccessDenied so users who have manually pre-configured + policies or whose IAM principal lacks ecr:SetRepositoryPolicy are not blocked. + """ + + existing_statements = [] + try: + response = ecr_client.get_repository_policy(repositoryName=repo_name) + policy_doc = json.loads(response.get("policyText", "{}")) + existing_statements = policy_doc.get("Statement", []) + except ecr_client.exceptions.RepositoryPolicyNotFoundException: + existing_statements = [] + except botocore.exceptions.ClientError as ex: + error_code = ex.response.get("Error", {}).get("Code", "") + if error_code in ("AccessDeniedException", "AuthorizationErrorException"): + LOG.warning( + "Could not read ECR policy for '%s' (access denied). " + "Skipping — ensure ecr:GetRepositoryPolicy permission to prevent " + "intermittent Lambda 403 errors during deployment.", + repo_name, + ) + return + raise deploy_exceptions.ECRPolicySetError(repo_name=repo_name, msg=str(ex)) from ex + + filtered = [s for s in existing_statements if s.get("Sid") != _SAM_ECR_POLICY_SID] + + merged_policy = { + "Version": "2012-10-17", + "Statement": filtered + [_LAMBDA_ECR_POLICY_STATEMENT], + } + + try: + ecr_client.set_repository_policy( + repositoryName=repo_name, + policyText=json.dumps(merged_policy), + force=False, + ) + LOG.info("Pre-set Lambda pull policy on ECR repository '%s'", repo_name) + except botocore.exceptions.ClientError as ex: + error_code = ex.response.get("Error", {}).get("Code", "") + if error_code in ("AccessDeniedException", "AuthorizationErrorException"): + LOG.warning( + "Could not set ECR policy for '%s' (access denied). " + "Skipping — ensure ecr:SetRepositoryPolicy permission to prevent " + "intermittent Lambda 403 errors during deployment.", + repo_name, + ) + return + raise deploy_exceptions.ECRPolicySetError(repo_name=repo_name, msg=str(ex)) from ex diff --git a/samcli/commands/deploy/exceptions.py b/samcli/commands/deploy/exceptions.py index fb4cfe77a3e..10d0a6c9d4f 100644 --- a/samcli/commands/deploy/exceptions.py +++ b/samcli/commands/deploy/exceptions.py @@ -161,3 +161,9 @@ def __init__(self, stack_name: str, missing_key: str, mapping_name: str, origina Original CloudFormation error: {original_error}""" super().__init__(stack_name=stack_name, msg=message) + + +class ECRPolicySetError(UserException): + def __init__(self, repo_name: str, msg: str): + message_fmt = "Failed to set ECR repository policy for '{repo_name}': {msg}" + super().__init__(message=message_fmt.format(repo_name=repo_name, msg=msg)) diff --git a/tests/unit/commands/_utils/test_template.py b/tests/unit/commands/_utils/test_template.py index 47bfe94aea4..a856b1ed26e 100644 --- a/tests/unit/commands/_utils/test_template.py +++ b/tests/unit/commands/_utils/test_template.py @@ -601,36 +601,24 @@ def test_updates_imageuri_when_pointing_to_local_archive(self): new_root = os.path.join(tmpdir, ".aws-sam", "build") os.makedirs(new_root, exist_ok=True) - # Create a fake image archive at the resolved relative path from CWD - # _resolve_relative_to computes a path relative to new_root, and - # pathlib.Path(updated_path).is_file() checks relative to CWD. - # We need the file to exist at the CWD-relative resolved path. resolved_relative = os.path.relpath( os.path.join(original_root, "my-image.tar.gz"), new_root, ) - # Create the archive at the CWD-relative resolved path - resolved_abs = os.path.join(os.getcwd(), resolved_relative) - os.makedirs(os.path.dirname(resolved_abs), exist_ok=True) - with open(resolved_abs, "w") as f: - f.write("fake archive") - - try: - mappings = { - "SAMImageUriFunctions": { - "alpha": {"ImageUri": "my-image.tar.gz"}, - } + + mappings = { + "SAMImageUriFunctions": { + "alpha": {"ImageUri": "my-image.tar.gz"}, } + } + # Mock is_file() so we don't need to create a real file outside the temp dir + with patch("samcli.commands._utils.template.pathlib.Path.is_file", return_value=True): _update_sam_mappings_relative_paths(mappings, original_root, new_root) - # The path should be updated since it resolves to a real local file - updated_uri = mappings["SAMImageUriFunctions"]["alpha"]["ImageUri"] - self.assertEqual(updated_uri, resolved_relative) - finally: - # Clean up the file we created relative to CWD - if os.path.exists(resolved_abs): - os.remove(resolved_abs) + # The path should be updated since it resolves to a real local file + updated_uri = mappings["SAMImageUriFunctions"]["alpha"]["ImageUri"] + self.assertEqual(updated_uri, resolved_relative) def test_move_template_preserves_docker_imageuri_in_sam_mappings(self): """End-to-end: move_template should not rewrite Docker image references in SAM ImageUri Mappings.""" diff --git a/tests/unit/commands/deploy/test_deploy_context.py b/tests/unit/commands/deploy/test_deploy_context.py index 844d5138d19..d3b286ab7a5 100644 --- a/tests/unit/commands/deploy/test_deploy_context.py +++ b/tests/unit/commands/deploy/test_deploy_context.py @@ -11,6 +11,7 @@ from samcli.commands.deploy.exceptions import DeployFailedError +@patch("samcli.commands.deploy.deploy_context._ensure_ecr_lambda_pull_policy", MagicMock()) class TestSamDeployCommand(TestCase): def setUp(self): self.deploy_command_context = DeployContext( diff --git a/tests/unit/commands/deploy/test_ecr_policy_helpers.py b/tests/unit/commands/deploy/test_ecr_policy_helpers.py new file mode 100644 index 00000000000..9b5c426b848 --- /dev/null +++ b/tests/unit/commands/deploy/test_ecr_policy_helpers.py @@ -0,0 +1,331 @@ +""" +Unit tests for the ECR policy helper functions in deploy_context.py. + +These helpers pre-set Lambda pull access on ECR repositories before +CloudFormation creates the changeset, preventing the concurrent +SetRepositoryPolicy race condition described in GitHub issue #8190. +""" + +import json +import logging +import threading +import time +from typing import Dict, List +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from botocore.exceptions import ClientError + +from samcli.commands.deploy.deploy_context import ( + _SAM_ECR_POLICY_SID, + _ensure_ecr_lambda_pull_policy, + _extract_ecr_repo_name, + _upsert_ecr_lambda_policy, +) +from samcli.commands.deploy.exceptions import ECRPolicySetError + +_RepoNotFoundException = type("RepositoryPolicyNotFoundException", (Exception,), {}) + + +def _make_access_denied_error(operation: str) -> ClientError: + return ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "User is not authorized"}}, + operation, + ) + + +def _make_unexpected_client_error(operation: str) -> ClientError: + return ClientError( + {"Error": {"Code": "InternalServerError", "Message": "Something went wrong"}}, + operation, + ) + + +def _make_ecr_client(existing_policy_doc=None, get_side_effect=None, set_side_effect=None): + """Build a mock ECR client.""" + ecr_client = MagicMock() + ecr_client.exceptions.RepositoryPolicyNotFoundException = _RepoNotFoundException + + if get_side_effect is not None: + ecr_client.get_repository_policy.side_effect = get_side_effect + elif existing_policy_doc is not None: + ecr_client.get_repository_policy.return_value = {"policyText": json.dumps(existing_policy_doc)} + else: + ecr_client.get_repository_policy.return_value = {"policyText": "{}"} + + if set_side_effect is not None: + ecr_client.set_repository_policy.side_effect = set_side_effect + + return ecr_client + + +def _make_stateful_ecr_client(initial_policy_doc=None): + """Return a mock ECR client with in-memory get/set (full-document replace).""" + store = {"policyText": json.dumps(initial_policy_doc or {"Version": "2012-10-17", "Statement": []})} + + def _get(**kwargs): + return {"policyText": store["policyText"]} + + def _set(**kwargs): + store["policyText"] = kwargs["policyText"] + return {} + + client = MagicMock() + client.exceptions.RepositoryPolicyNotFoundException = _RepoNotFoundException + client.get_repository_policy.side_effect = _get + client.set_repository_policy.side_effect = _set + client._store = store + return client + + +# --------------------------------------------------------------------------- +# _extract_ecr_repo_name tests +# --------------------------------------------------------------------------- + + +class TestExtractEcrRepoName(TestCase): + def test_uri_with_tag(self): + uri = "123456789012.dkr.ecr.us-east-1.amazonaws.com/my-repo:latest" + self.assertEqual(_extract_ecr_repo_name(uri), "my-repo") + + def test_uri_with_namespace_and_tag(self): + uri = "123456789012.dkr.ecr.us-east-1.amazonaws.com/org/my-repo:v1" + self.assertEqual(_extract_ecr_repo_name(uri), "org/my-repo") + + def test_uri_without_tag(self): + uri = "123456789012.dkr.ecr.us-east-1.amazonaws.com/my-repo" + self.assertEqual(_extract_ecr_repo_name(uri), "my-repo") + + +# --------------------------------------------------------------------------- +# _ensure_ecr_lambda_pull_policy routing/deduplication tests +# --------------------------------------------------------------------------- + + +class TestEnsureEcrLambdaPullPolicy(TestCase): + def test_both_none_returns_early(self): + ecr_client = _make_ecr_client() + _ensure_ecr_lambda_pull_policy(ecr_client, None, None) + ecr_client.get_repository_policy.assert_not_called() + + def test_empty_dict_and_none_returns_early(self): + ecr_client = _make_ecr_client() + _ensure_ecr_lambda_pull_policy(ecr_client, {}, None) + ecr_client.get_repository_policy.assert_not_called() + + @patch("samcli.commands.deploy.deploy_context._upsert_ecr_lambda_policy") + def test_deduplicates_same_repo(self, mock_upsert): + uri = "123456789012.dkr.ecr.us-east-1.amazonaws.com/my-repo:latest" + _ensure_ecr_lambda_pull_policy(MagicMock(), {"FnA": uri, "FnB": uri}, None) + mock_upsert.assert_called_once() + + @patch("samcli.commands.deploy.deploy_context._upsert_ecr_lambda_policy") + def test_two_different_repos_calls_twice(self, mock_upsert): + _ensure_ecr_lambda_pull_policy( + MagicMock(), + { + "FnA": "123456789012.dkr.ecr.us-east-1.amazonaws.com/repo-a:v1", + "FnB": "123456789012.dkr.ecr.us-east-1.amazonaws.com/repo-b:v1", + }, + None, + ) + self.assertEqual(mock_upsert.call_count, 2) + + @patch("samcli.commands.deploy.deploy_context._upsert_ecr_lambda_policy") + def test_singular_image_repository(self, mock_upsert): + _ensure_ecr_lambda_pull_policy(MagicMock(), None, "123456789012.dkr.ecr.us-east-1.amazonaws.com/single:v2") + mock_upsert.assert_called_once() + + +# --------------------------------------------------------------------------- +# _upsert_ecr_lambda_policy tests +# --------------------------------------------------------------------------- + + +class TestUpsertEcrLambdaPolicy(TestCase): + def test_no_existing_policy_sets_sam_statement(self): + ecr_client = _make_ecr_client(get_side_effect=_RepoNotFoundException("no policy")) + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + ecr_client.set_repository_policy.assert_called_once() + kwargs = ecr_client.set_repository_policy.call_args.kwargs + self.assertEqual(kwargs["repositoryName"], "my-repo") + self.assertFalse(kwargs["force"]) + policy = json.loads(kwargs["policyText"]) + self.assertEqual(len(policy["Statement"]), 1) + self.assertEqual(policy["Statement"][0]["Sid"], _SAM_ECR_POLICY_SID) + + def test_preserves_existing_statements_and_appends_sam(self): + existing = { + "Version": "2012-10-17", + "Statement": [{"Sid": "CustomerPolicy", "Effect": "Allow", "Principal": "*", "Action": "ecr:*"}], + } + ecr_client = _make_ecr_client(existing_policy_doc=existing) + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + policy = json.loads(ecr_client.set_repository_policy.call_args.kwargs["policyText"]) + sids = [s["Sid"] for s in policy["Statement"]] + self.assertIn("CustomerPolicy", sids) + self.assertIn(_SAM_ECR_POLICY_SID, sids) + self.assertEqual(len(policy["Statement"]), 2) + + def test_idempotent_replaces_existing_sam_statement(self): + stale = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": _SAM_ECR_POLICY_SID, + "Effect": "Deny", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "ecr:*", + } + ], + } + ecr_client = _make_ecr_client(existing_policy_doc=stale) + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + policy = json.loads(ecr_client.set_repository_policy.call_args.kwargs["policyText"]) + sam_stmts = [s for s in policy["Statement"] if s["Sid"] == _SAM_ECR_POLICY_SID] + self.assertEqual(len(sam_stmts), 1) + self.assertEqual(sam_stmts[0]["Effect"], "Allow") + + def test_get_access_denied_logs_warning_skips(self): + ecr_client = _make_ecr_client(get_side_effect=_make_access_denied_error("GetRepositoryPolicy")) + with self.assertLogs("samcli.commands.deploy.deploy_context", level=logging.WARNING): + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + ecr_client.set_repository_policy.assert_not_called() + + def test_get_unexpected_error_raises(self): + ecr_client = _make_ecr_client(get_side_effect=_make_unexpected_client_error("GetRepositoryPolicy")) + with self.assertRaises(ECRPolicySetError): + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + def test_set_access_denied_logs_warning_skips(self): + ecr_client = _make_ecr_client( + get_side_effect=_RepoNotFoundException("no policy"), + set_side_effect=_make_access_denied_error("SetRepositoryPolicy"), + ) + with self.assertLogs("samcli.commands.deploy.deploy_context", level=logging.WARNING): + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + def test_set_unexpected_error_raises(self): + ecr_client = _make_ecr_client( + get_side_effect=_RepoNotFoundException("no policy"), + set_side_effect=_make_unexpected_client_error("SetRepositoryPolicy"), + ) + with self.assertRaises(ECRPolicySetError): + _upsert_ecr_lambda_policy(ecr_client, "my-repo") + + +# --------------------------------------------------------------------------- +# Issue #8190 scenario tests +# --------------------------------------------------------------------------- + +REGISTRY = "123456789012.dkr.ecr.us-east-1.amazonaws.com" + +SEVEN_REPO_IMAGE_REPOSITORIES = { + "bigDumperLambda": f"{REGISTRY}/big-dumper:v3", + "bqLoaderLambda": f"{REGISTRY}/bq-loader:v3", + "littleCheckerLambda": f"{REGISTRY}/little-checker:v3", + "littleDumperLambda": f"{REGISTRY}/little-dumper:v3", + "tableMakerLambda": f"{REGISTRY}/table-maker:v3", + "publisherLambda": f"{REGISTRY}/publisher:v3", + "jobCheckerLambda": f"{REGISTRY}/job-checker:v3", +} + +SHARED_REPO = "my-app" +SEVEN_FUNCTIONS_SAME_REPO = { + "bigDumperLambda": f"{REGISTRY}/{SHARED_REPO}:big-dumper-v3", + "bqLoaderLambda": f"{REGISTRY}/{SHARED_REPO}:bq-loader-v3", + "littleCheckerLambda": f"{REGISTRY}/{SHARED_REPO}:little-checker-v3", + "littleDumperLambda": f"{REGISTRY}/{SHARED_REPO}:little-dumper-v3", + "tableMakerLambda": f"{REGISTRY}/{SHARED_REPO}:table-maker-v3", + "publisherLambda": f"{REGISTRY}/{SHARED_REPO}:publisher-v3", + "jobCheckerLambda": f"{REGISTRY}/{SHARED_REPO}:job-checker-v3", +} + + +class TestIssue8190Scenarios(TestCase): + def test_seven_distinct_repos_each_gets_policy(self): + ecr_client = _make_ecr_client(get_side_effect=_RepoNotFoundException("no policy")) + _ensure_ecr_lambda_pull_policy(ecr_client, SEVEN_REPO_IMAGE_REPOSITORIES, None) + + self.assertEqual(ecr_client.set_repository_policy.call_count, 7) + for c in ecr_client.set_repository_policy.call_args_list: + policy = json.loads(c.kwargs["policyText"]) + sam_stmts = [s for s in policy["Statement"] if s.get("Sid") == _SAM_ECR_POLICY_SID] + self.assertEqual(len(sam_stmts), 1) + self.assertFalse(c.kwargs.get("force", True)) + + def test_seven_functions_same_repo_deduplicates_to_one_call(self): + ecr_client = _make_ecr_client(get_side_effect=_RepoNotFoundException("no policy")) + _ensure_ecr_lambda_pull_policy(ecr_client, SEVEN_FUNCTIONS_SAME_REPO, None) + + self.assertEqual(ecr_client.set_repository_policy.call_count, 1) + self.assertEqual( + ecr_client.set_repository_policy.call_args.kwargs["repositoryName"], + SHARED_REPO, + ) + + def test_race_condition_without_fix(self): + """Documents the bug: concurrent full-doc replaces overwrite each other.""" + policy_store: Dict = {"doc": {"Version": "2012-10-17", "Statement": []}} + + def cf_handler(function_name: str): + current = json.loads(json.dumps(policy_store["doc"])) + time.sleep(0.01) + current["Statement"] = [ + { + "Sid": f"Grant_{function_name}", + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": ["ecr:GetDownloadUrlForLayer"], + } + ] + policy_store["doc"] = current + + threads = [threading.Thread(target=cf_handler, args=(n,)) for n in SEVEN_FUNCTIONS_SAME_REPO] + for t in threads: + t.start() + for t in threads: + t.join() + + # Only last writer survives — the bug + self.assertEqual(len(policy_store["doc"]["Statement"]), 1) + + def test_pre_set_policy_survives_cf_writes(self): + """With the fix, the pre-set policy ensures Lambda can always pull.""" + ecr_client = _make_stateful_ecr_client() + _ensure_ecr_lambda_pull_policy(ecr_client, SEVEN_FUNCTIONS_SAME_REPO, None) + + # Verify pre-set + statements = json.loads(ecr_client._store["policyText"])["Statement"] + self.assertEqual(len(statements), 1) + self.assertEqual(statements[0]["Sid"], _SAM_ECR_POLICY_SID) + + def test_second_deploy_is_idempotent(self): + ecr_client = _make_stateful_ecr_client() + _ensure_ecr_lambda_pull_policy(ecr_client, SEVEN_REPO_IMAGE_REPOSITORIES, None) + _ensure_ecr_lambda_pull_policy(ecr_client, SEVEN_REPO_IMAGE_REPOSITORIES, None) + + # Each call writes a policy with exactly 1 SAM statement + for c in ecr_client.set_repository_policy.call_args_list: + policy = json.loads(c.kwargs["policyText"]) + sam_count = sum(1 for s in policy["Statement"] if s.get("Sid") == _SAM_ECR_POLICY_SID) + self.assertEqual(sam_count, 1) + + def test_mixed_shared_and_distinct_repos(self): + mixed = { + "fn1": f"{REGISTRY}/shared:tag1", + "fn2": f"{REGISTRY}/shared:tag2", + "fn3": f"{REGISTRY}/shared:tag3", + "fn4": f"{REGISTRY}/shared:tag4", + "fn5": f"{REGISTRY}/distinct-a:v1", + "fn6": f"{REGISTRY}/distinct-b:v1", + "fn7": f"{REGISTRY}/distinct-c:v1", + } + ecr_client = _make_ecr_client(get_side_effect=_RepoNotFoundException("no policy")) + _ensure_ecr_lambda_pull_policy(ecr_client, mixed, None) + + # 1 shared + 3 distinct = 4 calls + self.assertEqual(ecr_client.set_repository_policy.call_count, 4)