From afdb2154d49733b9b8a747b13df7b38fac658876 Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Wed, 18 Mar 2026 01:08:16 +0100 Subject: [PATCH 1/3] Construct boundary ARN instead of data source lookup The boundary policy is tagged team=javabin (org default), not shared. Instead of looking it up via iam:GetPolicy (which the cross-team deny blocks), construct the deterministic ARN from the account ID and project. - Remove data source from platform-data module - Use expr:arn:aws:iam::${env:AWS_ACCOUNT_ID}:policy/... in registry - Revert boundary.tf tags override (org default_tags are correct) --- scripts/registry.py | 6 ++++-- terraform/modules/platform-data/main.tf | 6 +++--- terraform/org/boundary.tf | 1 + 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/registry.py b/scripts/registry.py index 3fa7cff..46d21f9 100644 --- a/scripts/registry.py +++ b/scripts/registry.py @@ -60,7 +60,9 @@ "ecs_cluster_name": "data.aws_ecs_cluster.platform_main.cluster_name", "execution_role_arn": "data.aws_iam_role.platform_ecs_execution.arn", "route53_zone_id": "data.aws_route53_zone.platform_main.zone_id", - "developer_boundary_arn": "data.aws_iam_policy.platform_developer_boundary.arn", + # Boundary ARN constructed from account ID — no data source needed. + # Avoids iam:GetPolicy permission requirement on the boundary policy. + "developer_boundary_arn": "NOT_USED", }, }, @@ -128,7 +130,7 @@ "team": "yaml:team", "region": "env:AWS_REGION", "aws_account_id": "env:AWS_ACCOUNT_ID", - "permissions_boundary_arn": "ref:platform.developer_boundary_arn", + "permissions_boundary_arn": f"expr:arn:aws:iam::${{env:AWS_ACCOUNT_ID}}:policy/{PROJECT}-developer-boundary", "trusted_services": "list:yaml:compute.trusted_service|default:ecs-tasks.amazonaws.com", "additional_policy_jsons": "collect:access_policy_json", }, diff --git a/terraform/modules/platform-data/main.tf b/terraform/modules/platform-data/main.tf index a47003c..7d62ff4 100644 --- a/terraform/modules/platform-data/main.tf +++ b/terraform/modules/platform-data/main.tf @@ -67,6 +67,6 @@ data "aws_route53_zone" "main" { private_zone = false } -data "aws_iam_policy" "developer_boundary" { - name = "${var.project}-developer-boundary" -} + +# Note: the developer boundary ARN is constructed directly by expand-modules.py +# instead of using a data source, to avoid needing iam:GetPolicy permission. diff --git a/terraform/org/boundary.tf b/terraform/org/boundary.tf index 5bca20e..36cbf55 100644 --- a/terraform/org/boundary.tf +++ b/terraform/org/boundary.tf @@ -17,6 +17,7 @@ resource "aws_iam_policy" "developer_boundary" { name = "${var.project}-developer-boundary" description = "Permission boundary for all non-platform roles. Self-replicating: roles with this boundary can only create roles that also carry it." + policy = jsonencode({ Version = "2012-10-17" Statement = [ From 17e2a46fafdafdcb111bb7696f5bd79d653c6aa2 Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Wed, 18 Mar 2026 01:31:22 +0100 Subject: [PATCH 2/3] Apply gate: resolve team from GitHub API like broker The gate was using the old per-app role pattern (javabin-ci-app-{repo}). Updated to resolve team via GitHub API and assume javabin-ci-team-{team}, matching the broker's team-based model. - Extract GitHub App auth + team resolution to shared/github.py - Update ci_broker and apply_gate to use shared module - Add SSM read permission for GitHub App credentials to gate role - Switch both Lambda archives to source{} blocks for shared inclusion --- terraform/lambda-src/apply_gate/handler.py | 16 ++- terraform/lambda-src/ci_broker/handler.py | 126 +----------------- terraform/lambda-src/shared/github.py | 143 +++++++++++++++++++++ terraform/platform/lambdas/main.tf | 37 +++++- 4 files changed, 193 insertions(+), 129 deletions(-) create mode 100644 terraform/lambda-src/shared/github.py diff --git a/terraform/lambda-src/apply_gate/handler.py b/terraform/lambda-src/apply_gate/handler.py index cb63054..f929954 100644 --- a/terraform/lambda-src/apply_gate/handler.py +++ b/terraform/lambda-src/apply_gate/handler.py @@ -7,7 +7,7 @@ The signing key lives in SSM. Only this Lambda can read it. CI roles invoke the Lambda but never see the key. Temp credentials are issued via STS -AssumeRole on the app's CI role. +AssumeRole on the team's CI role (resolved from GitHub team membership). """ import hashlib @@ -18,6 +18,7 @@ import time import boto3 +from shared.github import resolve_team logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -204,9 +205,18 @@ def action_status(event): def _issue_credentials(repo_name): - """Assume the app's CI role and return temporary credentials.""" + """Resolve team from GitHub and assume the team's CI role.""" account_id = os.environ.get("ACCOUNT_ID", "") - role_arn = f"arn:aws:iam::{account_id}:role/{PROJECT}-ci-app-{repo_name}" + + team = resolve_team(repo_name) + if not team: + raise RuntimeError( + f"Repo '{repo_name}' is not in any GitHub team. " + f"Add it at https://github.com/orgs/javaBin/teams" + ) + + role_arn = f"arn:aws:iam::{account_id}:role/{PROJECT}-ci-team-{team}" + logger.info("Assuming team role %s for repo %s", role_arn, repo_name) resp = sts.assume_role( RoleArn=role_arn, diff --git a/terraform/lambda-src/ci_broker/handler.py b/terraform/lambda-src/ci_broker/handler.py index 428bcd8..04712da 100644 --- a/terraform/lambda-src/ci_broker/handler.py +++ b/terraform/lambda-src/ci_broker/handler.py @@ -13,26 +13,18 @@ import json import logging import os -import time -import urllib.request import boto3 +from shared.github import resolve_team logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -ssm = boto3.client("ssm") sts = boto3.client("sts") ACCOUNT_ID = os.environ.get("AWS_ACCOUNT_ID", "") PROJECT = os.environ.get("PROJECT", "javabin") GITHUB_ORG = os.environ.get("GITHUB_ORG", "javaBin") -GITHUB_APP_ID_PARAM = os.environ.get("GITHUB_APP_ID_PARAM", "/javabin/platform/github-app-id") -GITHUB_APP_KEY_PARAM = os.environ.get("GITHUB_APP_KEY_PARAM", "/javabin/platform/github-app-key") - -# Cache GitHub App token across invocations (valid for 1 hour) -_token_cache = {"token": None, "expires_at": 0} -_ssm_cache = {} PLAN_DURATION = 3600 # 1 hour for plan DEPLOY_DURATION = 900 # 15 minutes for deploy @@ -42,120 +34,6 @@ "deploy": f"{PROJECT}-ci-deploy-", } -EXCLUDED_TEAMS = {"platform"} - - -def _get_ssm(param_name): - if param_name not in _ssm_cache: - resp = ssm.get_parameter(Name=param_name, WithDecryption=True) - _ssm_cache[param_name] = resp["Parameter"]["Value"] - return _ssm_cache[param_name] - - -def _github_app_token(): - """Generate a GitHub App installation token (cached for 50 minutes).""" - now = time.time() - if _token_cache["token"] and _token_cache["expires_at"] > now: - return _token_cache["token"] - - import subprocess - import tempfile - - app_id = _get_ssm(GITHUB_APP_ID_PARAM) - private_key = _get_ssm(GITHUB_APP_KEY_PARAM) - - # Build JWT (Header.Payload.Signature) - import base64 - import hashlib - import hmac - - header = base64.urlsafe_b64encode(json.dumps( - {"alg": "RS256", "typ": "JWT"}).encode()).rstrip(b"=").decode() - - iat = int(now) - 60 - exp = iat + 600 - payload = base64.urlsafe_b64encode(json.dumps( - {"iss": app_id, "iat": iat, "exp": exp}).encode()).rstrip(b"=").decode() - - signing_input = f"{header}.{payload}" - - # Sign with RS256 using openssl (available in Lambda runtime) - with tempfile.NamedTemporaryFile(mode="w", suffix=".pem", delete=False) as f: - f.write(private_key) - key_file = f.name - - result = subprocess.run( - ["openssl", "dgst", "-sha256", "-sign", key_file], - input=signing_input.encode(), - capture_output=True, - ) - os.unlink(key_file) - - if result.returncode != 0: - raise RuntimeError(f"JWT signing failed: {result.stderr.decode()}") - - signature = base64.urlsafe_b64encode(result.stdout).rstrip(b"=").decode() - jwt_token = f"{signing_input}.{signature}" - - # Exchange JWT for installation token - # First, find the installation ID for the org - req = urllib.request.Request( - f"https://api.github.com/app/installations", - headers={ - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github+json", - }, - ) - with urllib.request.urlopen(req) as resp: - installations = json.loads(resp.read()) - - install_id = None - for inst in installations: - if inst.get("account", {}).get("login") == GITHUB_ORG: - install_id = inst["id"] - break - - if not install_id: - raise RuntimeError(f"No GitHub App installation found for {GITHUB_ORG}") - - req = urllib.request.Request( - f"https://api.github.com/app/installations/{install_id}/access_tokens", - method="POST", - headers={ - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github+json", - }, - ) - with urllib.request.urlopen(req) as resp: - token_resp = json.loads(resp.read()) - - _token_cache["token"] = token_resp["token"] - _token_cache["expires_at"] = now + 3000 # Cache for ~50 minutes - return _token_cache["token"] - - -def _resolve_team(repo_name): - """Resolve which team a repo belongs to via GitHub API.""" - token = _github_app_token() - req = urllib.request.Request( - f"https://api.github.com/repos/{GITHUB_ORG}/{repo_name}/teams", - headers={ - "Authorization": f"token {token}", - "Accept": "application/vnd.github+json", - }, - ) - try: - with urllib.request.urlopen(req) as resp: - teams = json.loads(resp.read()) - except urllib.error.HTTPError as e: - logger.error("GitHub API error for %s: %s", repo_name, e) - return None - - for team in teams: - if team["slug"] not in EXCLUDED_TEAMS: - return team["slug"] - return None - def _assume_role(role_arn, session_name, duration): """Assume an IAM role and return temporary credentials.""" @@ -183,7 +61,7 @@ def handler(event, context): return {"error": f"Invalid action: {action}. Must be plan or deploy", "approved": False} # Resolve team from GitHub - team = _resolve_team(repo) + team = resolve_team(repo) if not team: logger.warning("Repo %s does not belong to any team", repo) return { diff --git a/terraform/lambda-src/shared/github.py b/terraform/lambda-src/shared/github.py new file mode 100644 index 0000000..038be9e --- /dev/null +++ b/terraform/lambda-src/shared/github.py @@ -0,0 +1,143 @@ +"""GitHub App authentication and team resolution. + +Shared by ci_broker and apply_gate Lambdas. Uses the platform GitHub App +to generate installation tokens and resolve repo→team membership. +""" + +import base64 +import json +import logging +import os +import subprocess +import tempfile +import time +import urllib.error +import urllib.request + +import boto3 + +logger = logging.getLogger(__name__) + +ssm = boto3.client("ssm") + +GITHUB_ORG = os.environ.get("GITHUB_ORG", "javaBin") +GITHUB_APP_ID_PARAM = os.environ.get( + "GITHUB_APP_ID_PARAM", "/javabin/platform/github-app-id" +) +GITHUB_APP_KEY_PARAM = os.environ.get( + "GITHUB_APP_KEY_PARAM", "/javabin/platform/github-app-key" +) + +EXCLUDED_TEAMS = {"platform"} + +# Cache across invocations +_token_cache = {"token": None, "expires_at": 0} +_ssm_cache = {} + + +def _get_ssm(param_name): + if param_name not in _ssm_cache: + resp = ssm.get_parameter(Name=param_name, WithDecryption=True) + _ssm_cache[param_name] = resp["Parameter"]["Value"] + return _ssm_cache[param_name] + + +def github_app_token(): + """Generate a GitHub App installation token (cached for 50 minutes).""" + now = time.time() + if _token_cache["token"] and _token_cache["expires_at"] > now: + return _token_cache["token"] + + app_id = _get_ssm(GITHUB_APP_ID_PARAM) + private_key = _get_ssm(GITHUB_APP_KEY_PARAM) + + # Build JWT (Header.Payload.Signature) + header = base64.urlsafe_b64encode(json.dumps( + {"alg": "RS256", "typ": "JWT"}).encode()).rstrip(b"=").decode() + + iat = int(now) - 60 + exp = iat + 600 + payload = base64.urlsafe_b64encode(json.dumps( + {"iss": app_id, "iat": iat, "exp": exp}).encode()).rstrip(b"=").decode() + + signing_input = f"{header}.{payload}" + + # Sign with RS256 using openssl (available in Lambda runtime) + with tempfile.NamedTemporaryFile(mode="w", suffix=".pem", delete=False) as f: + f.write(private_key) + key_file = f.name + + result = subprocess.run( + ["openssl", "dgst", "-sha256", "-sign", key_file], + input=signing_input.encode(), + capture_output=True, + ) + os.unlink(key_file) + + if result.returncode != 0: + raise RuntimeError(f"JWT signing failed: {result.stderr.decode()}") + + signature = base64.urlsafe_b64encode(result.stdout).rstrip(b"=").decode() + jwt_token = f"{signing_input}.{signature}" + + # Exchange JWT for installation token + req = urllib.request.Request( + "https://api.github.com/app/installations", + headers={ + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github+json", + }, + ) + with urllib.request.urlopen(req) as resp: + installations = json.loads(resp.read()) + + install_id = None + for inst in installations: + if inst.get("account", {}).get("login") == GITHUB_ORG: + install_id = inst["id"] + break + + if not install_id: + raise RuntimeError(f"No GitHub App installation found for {GITHUB_ORG}") + + req = urllib.request.Request( + f"https://api.github.com/app/installations/{install_id}/access_tokens", + method="POST", + headers={ + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github+json", + }, + ) + with urllib.request.urlopen(req) as resp: + token_resp = json.loads(resp.read()) + + _token_cache["token"] = token_resp["token"] + _token_cache["expires_at"] = now + 3000 # Cache for ~50 minutes + return _token_cache["token"] + + +def resolve_team(repo_name): + """Resolve which team a repo belongs to via GitHub API. + + Returns the team slug, or None if the repo isn't in any team. + Excludes platform-internal teams (e.g. 'platform'). + """ + token = github_app_token() + req = urllib.request.Request( + f"https://api.github.com/repos/{GITHUB_ORG}/{repo_name}/teams", + headers={ + "Authorization": f"token {token}", + "Accept": "application/vnd.github+json", + }, + ) + try: + with urllib.request.urlopen(req) as resp: + teams = json.loads(resp.read()) + except urllib.error.HTTPError as e: + logger.error("GitHub API error for %s: %s", repo_name, e) + return None + + for team in teams: + if team["slug"] not in EXCLUDED_TEAMS: + return team["slug"] + return None diff --git a/terraform/platform/lambdas/main.tf b/terraform/platform/lambdas/main.tf index 5f295bd..516f380 100644 --- a/terraform/platform/lambdas/main.tf +++ b/terraform/platform/lambdas/main.tf @@ -1266,7 +1266,19 @@ data "archive_file" "apply_gate" { type = "zip" output_path = "${path.module}/builds/apply_gate.zip" output_file_mode = "0644" - source_dir = "${local.lambda_src_path}/apply_gate" + + source { + content = file("${local.lambda_src_path}/apply_gate/handler.py") + filename = "handler.py" + } + source { + content = file("${local.lambda_src_path}/shared/__init__.py") + filename = "shared/__init__.py" + } + source { + content = file("${local.lambda_src_path}/shared/github.py") + filename = "shared/github.py" + } } resource "aws_iam_role" "apply_gate" { @@ -1311,6 +1323,15 @@ resource "aws_iam_role_policy" "apply_gate" { Action = "sts:AssumeRole" Resource = "arn:aws:iam::${var.aws_account_id}:role/${var.project}-ci-team-*" }, + { + Sid = "ReadGitHubAppCredentials" + Effect = "Allow" + Action = "ssm:GetParameter" + Resource = [ + "arn:aws:ssm:${var.region}:${var.aws_account_id}:parameter/${var.project}/platform/github-app-id", + "arn:aws:ssm:${var.region}:${var.aws_account_id}:parameter/${var.project}/platform/github-app-key", + ] + }, ] }) } @@ -1349,7 +1370,19 @@ data "archive_file" "ci_broker" { type = "zip" output_path = "${path.module}/builds/ci_broker.zip" output_file_mode = "0644" - source_dir = "${local.lambda_src_path}/ci_broker" + + source { + content = file("${local.lambda_src_path}/ci_broker/handler.py") + filename = "handler.py" + } + source { + content = file("${local.lambda_src_path}/shared/__init__.py") + filename = "shared/__init__.py" + } + source { + content = file("${local.lambda_src_path}/shared/github.py") + filename = "shared/github.py" + } } resource "aws_iam_role" "ci_broker" { From 469b047022b3f642b91f509c49704d7ff53bff44 Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Wed, 18 Mar 2026 01:38:23 +0100 Subject: [PATCH 3/3] Use team-prefixed names for ECR repo and ECS service in CI docker-build: ECR repo is now {team}-{repo_name}, not just {repo_name} ecs-deploy: ECS service is now {team}-{repo_name} Both get the team from the broker output (already resolved via GitHub API). Also moved all context expressions to env vars for injection safety. --- .github/workflows/docker-build.yml | 14 ++++++++++---- .github/workflows/ecs-deploy.yml | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index a552387..c9474a0 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -79,21 +79,27 @@ jobs: - name: Determine image tags id: tags + env: + TEAM: ${{ steps.broker.outputs.team }} + REPO_NAME: ${{ github.event.repository.name }} + REF_NAME: ${{ github.ref_name }} + REF: ${{ github.ref }} + REGISTRY: ${{ steps.ecr.outputs.registry }} run: | - REPO="${{ steps.ecr.outputs.registry }}/${{ github.event.repository.name }}" + REPO="${REGISTRY}/${TEAM}-${REPO_NAME}" SHA_TAG="sha-${GITHUB_SHA::8}" echo "primary_tag=${SHA_TAG}" >> "$GITHUB_OUTPUT" echo "repo=${REPO}" >> "$GITHUB_OUTPUT" TAGS="${REPO}:${SHA_TAG}" - if [ "${{ github.ref_name }}" = "main" ] || [ "${{ github.ref_name }}" = "master" ]; then + if [ "${REF_NAME}" = "main" ] || [ "${REF_NAME}" = "master" ]; then DATE_TAG="main-$(date -u +%Y%m%d-%H%M)" TAGS="${TAGS},${REPO}:${DATE_TAG},${REPO}:latest" fi - if [[ "${{ github.ref }}" == refs/tags/v* ]]; then - TAGS="${TAGS},${REPO}:${{ github.ref_name }}" + if echo "${REF}" | grep -q '^refs/tags/v'; then + TAGS="${TAGS},${REPO}:${REF_NAME}" fi echo "tags=${TAGS}" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/ecs-deploy.yml b/.github/workflows/ecs-deploy.yml index 5d21656..36dc7d2 100644 --- a/.github/workflows/ecs-deploy.yml +++ b/.github/workflows/ecs-deploy.yml @@ -68,6 +68,6 @@ jobs: - name: Deploy to ECS env: - SERVICE: ${{ inputs.service_name || github.event.repository.name }} + SERVICE: ${{ inputs.service_name || format('{0}-{1}', steps.broker.outputs.team, github.event.repository.name) }} CLUSTER: ${{ inputs.cluster_name }} run: sh .platform/scripts/ecs-deploy.sh