From eca45ed4c3931b699a9e8c35bf6a267e3d6aaf3b Mon Sep 17 00:00:00 2001 From: ditahkk Date: Mon, 22 Dec 2025 19:51:17 -0500 Subject: [PATCH] Refactor CloudFormation templates for StackSets deployment - Updated README.md to reflect new structure and deployment order for CloudFormation StackSets. - Introduced new directory structure for PLT and WKL OU deployments. - Added detailed deployment order flowchart and StackSet configuration examples. - Created CloudFormation templates for GitHub OIDC provider and CI/CD runner role in PLT OU. - Implemented CI/CD deploy role and Terraform state backend in WKL OU with automatic onboarding. - Enhanced authentication documentation to clarify cross-account role chaining. - Revised conventions to align with new OU hierarchy and environment definitions. - Added comprehensive IAM policies for deploy roles, including KMS encryption and DynamoDB state locking. --- .github/workflows/README.md | 64 +++ README.md | 88 +++- cloudformation/README.md | 161 ++++-- .../plt/00-oidc-provider-github.yaml | 69 +++ .../stacksets/plt/10-iam-runner-role.yaml | 173 +++++++ .../stacksets/plt/25-cicd-artifacts.yaml | 300 +++++++++++ .../stacksets/wkl/15-iam-deploy-role.yaml | 235 +++++++++ .../wkl/20-terraform-state-backend.yaml | 375 ++++++++++++++ docs/authentication.md | 111 ++-- docs/caching.md | 477 ++++++++++++++++++ docs/conventions.md | 69 ++- docs/pipeline-rules.md | 160 +++++- gitlab-ci/README.md | 71 +++ jenkins/README.md | 106 ++++ 14 files changed, 2318 insertions(+), 141 deletions(-) create mode 100644 cloudformation/stacksets/plt/00-oidc-provider-github.yaml create mode 100644 cloudformation/stacksets/plt/10-iam-runner-role.yaml create mode 100644 cloudformation/stacksets/plt/25-cicd-artifacts.yaml create mode 100644 cloudformation/stacksets/wkl/15-iam-deploy-role.yaml create mode 100644 cloudformation/stacksets/wkl/20-terraform-state-backend.yaml create mode 100644 docs/caching.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 1da9837..4174e6f 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -38,6 +38,70 @@ Follow the trigger and deployment rules from docs/pipeline-rules.md: - Prod requires change_request input ``` +## Caching Strategy + +Use `actions/cache@v4` to dramatically reduce pipeline time: + +### Terraform Cache + +```yaml +- name: Cache Terraform providers + uses: actions/cache@v4 + with: + path: ~/.terraform.d/plugin-cache + key: terraform-${{ runner.os }}-${{ hashFiles('**/.terraform.lock.hcl') }} + restore-keys: terraform-${{ runner.os }}- + +- name: Terraform Init + run: terraform init + env: + TF_PLUGIN_CACHE_DIR: ~/.terraform.d/plugin-cache +``` + +### Ansible Cache + +```yaml +- name: Cache Ansible collections + uses: actions/cache@v4 + with: + path: ~/.ansible/collections + key: ansible-${{ hashFiles('**/requirements.yml') }} + restore-keys: ansible- + +- name: Install collections + run: ansible-galaxy collection install -r requirements.yml + env: + ANSIBLE_COLLECTIONS_PATH: ~/.ansible/collections +``` + +### S3 Backend Cache (Cross-Runner) + +For persistent caching across all runners: + +```yaml +env: + ARTIFACT_BUCKET: ${{ vars.ARTIFACT_BUCKET }} + +steps: + - name: Download cache from S3 + run: | + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + aws s3 cp "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + if [[ -f /tmp/cache.tar.gz ]]; then + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + + - name: Upload cache to S3 + if: always() + run: | + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ 2>/dev/null || true + aws s3 cp /tmp/cache.tar.gz "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" || true +``` + +See [docs/caching.md](../docs/caching.md) for complete details. + ## Files to Generate | File | Purpose | diff --git a/README.md b/README.md index f331b04..7aec7d9 100644 --- a/README.md +++ b/README.md @@ -2,33 +2,59 @@ Architecture patterns for IaC (Terraform, Ansible, CloudFormation) pipelines on AWS. -## AWS Account Model +## OU Hierarchy ``` -┌─────────────────────────────────┐ ┌─────────────────────────────────┐ -│ NON-PROD ACCOUNT │ │ PROD ACCOUNT │ -│ │ │ │ -│ 10-dev 20-qat │ │ 40-stg 70-prod 90-dr │ -│ │ │ │ -│ ca-central-1 │ │ ca-central-1 / ca-west-1 (DR) │ -└─────────────────────────────────┘ └─────────────────────────────────┘ +Root +├── PLT OU (Platform) +│ └── PLT-Runner Account +│ +└── WKL OU (Workloads) + ├── WKL-NPD OU (NonProd) + │ ├── SBX Account + │ ├── DEV Account + │ └── QAT Account + │ + └── WKL-PRD OU (Prod) + ├── STG Account + ├── PRD Account + └── DR Account ``` +**OU Code Reference:** + +| Code | Full Name | Purpose | +| ----------- | ----------------- | -------------------------------- | +| **PLT** | Platform | CI/CD runners and shared tooling | +| **WKL** | Workloads | Application environments | +| **WKL-NPD** | Workloads-NonProd | Development and testing | +| **WKL-PRD** | Workloads-Prod | Production and DR | + ## Core Patterns -### 1. Role Chaining +### 1. Cross-Account Role Chaining ```mermaid flowchart LR - A[CI/CD Runner] --> B[cicd-oidc-role
minimal perms] - B --> C[cicd-admin-role
full perms] - C --> D[Deploy] + A[CI/CD Platform] --> B[OIDC Provider] + B --> C[cicd-runner-role
PLT OU] + C --> D[cicd-deploy-role
WKL Accounts] + D --> E[Deploy] ``` -**Why:** OIDC handles auth, admin role handles permissions. See +**Why:** Runner in PLT assumes deploy roles in WKL accounts. See [docs/authentication.md](docs/authentication.md) -### 2. Pipeline Triggers +### 2. Two-StackSet Deployment + +| StackSet | Target OU | Creates | +| ------------------- | --------- | ------------------------------ | +| PLT Runner StackSet | PLT OU | `cicd-runner-role` + OIDC | +| WKL Deploy StackSet | WKL OU | `cicd-deploy-role` per account | + +New accounts added to WKL automatically receive deploy roles. + +### 3. Pipeline Triggers | Event | Runs? | Why | | ------------------- | ------ | ----------------- | @@ -39,20 +65,33 @@ flowchart LR See [docs/pipeline-rules.md](docs/pipeline-rules.md) -### 3. Environment Progression +### 4. Environment Progression ``` -NON-PROD ACCOUNT PROD ACCOUNT -──────────────── ──────────── -10-dev → 20-qat → 40-stg → 70-prod → 90-dr - ↑ - requires CR +WKL-NPD OU WKL-PRD OU +────────── ────────── +05-sbx → 10-dev → 20-qat → 40-stg → 70-prod → 90-dr + ↑ + requires CR ``` See [docs/conventions.md](docs/conventions.md) --- +## StackSet Templates + +``` +cloudformation/stacksets/ +├── plt/ # Deploy to PLT OU +│ ├── 00-oidc-provider-github.yaml +│ └── 10-iam-runner-role.yaml +│ +└── wkl/ # Deploy to WKL OU + ├── 15-iam-deploy-role.yaml + └── 20-terraform-state-backend.yaml +``` + ## Generate Pipelines Use AI with your org context + these docs: @@ -61,8 +100,8 @@ Use AI with your org context + these docs: Generate a [GitHub/GitLab/Jenkins] pipeline for Terraform: - Follow docs/pipeline-rules.md for triggers - Follow docs/authentication.md for role chaining -- 2 AWS accounts: non-prod, prod -- 5 environments: dev, qat (non-prod) | stg, prod, dr (prod account) +- PLT account for runners, WKL accounts for deployments +- 6 environments: sbx, dev, qat (WKL-NPD) | stg, prod, dr (WKL-PRD) - Regions: ca-central-1, ca-west-1 (DR) ``` @@ -77,5 +116,6 @@ Generate a [GitHub/GitLab/Jenkins] pipeline for Terraform: ## Docs - [Pipeline Rules](docs/pipeline-rules.md) - triggers, deployment patterns -- [Authentication](docs/authentication.md) - role chaining -- [Conventions](docs/conventions.md) - naming standards +- [Authentication](docs/authentication.md) - cross-account role chaining +- [Conventions](docs/conventions.md) - OU and naming standards +- [Caching](docs/caching.md) - artifact caching for faster pipelines diff --git a/cloudformation/README.md b/cloudformation/README.md index 6d3a1e0..9b646e1 100644 --- a/cloudformation/README.md +++ b/cloudformation/README.md @@ -1,58 +1,149 @@ -# CloudFormation +# CloudFormation StackSets -Placeholder directory for CloudFormation templates. +Foundation resources deployed via StackSets from management account. -## Generate Your CloudFormation +## OU-Targeted Deployment -Use AI to generate CloudFormation specific to your organization. Provide: - -1. **Your org context** - naming conventions, AWS accounts, regions -2. **The patterns from** `docs/conventions.md` -3. **Your specific requirements** - StackSets, nested stacks +``` +cloudformation/stacksets/ +├── plt/ # Deploy to PLT OU +│ ├── 00-oidc-provider-github.yaml # GitHub OIDC provider +│ ├── 10-iam-runner-role.yaml # Runner role (assumes WKL deploy roles) +│ └── 25-cicd-artifacts.yaml # Artifact & cache bucket +│ +└── wkl/ # Deploy to WKL OU + ├── 15-iam-deploy-role.yaml # Deploy role (trusts PLT runner) + └── 20-terraform-state-backend.yaml # State bucket + DynamoDB lock +``` -## StackSets for Foundation +## Deployment Order -CloudFormation StackSets deploy foundation resources across accounts: +```mermaid +flowchart LR + A[1. OIDC Provider
→ PLT] --> B[2. Runner Role
→ PLT] + B --> C[3. Artifact Bucket
→ PLT] + C --> D[4. Deploy Role
→ WKL] + D --> E[5. State Backend
→ WKL] +``` +## StackSet Configuration + +### PLT OU StackSets + +```bash +# 1. Deploy OIDC Provider to PLT OU +aws cloudformation create-stack-set \ + --stack-set-name plt-oidc-provider \ + --template-body file://stacksets/plt/00-oidc-provider-github.yaml \ + --parameters ParameterKey=GitHubOrganization,ParameterValue=YOUR_ORG \ + --permission-model SERVICE_MANAGED \ + --auto-deployment Enabled=true,RetainStacksOnAccountRemoval=false + +aws cloudformation create-stack-instances \ + --stack-set-name plt-oidc-provider \ + --deployment-targets OrganizationalUnitIds=ou-xxxx-plt \ + --regions ca-central-1 + +# 2. Deploy Runner Role to PLT OU +aws cloudformation create-stack-set \ + --stack-set-name plt-runner-role \ + --template-body file://stacksets/plt/10-iam-runner-role.yaml \ + --parameters ParameterKey=GitHubOrganization,ParameterValue=YOUR_ORG \ + --capabilities CAPABILITY_NAMED_IAM \ + --permission-model SERVICE_MANAGED \ + --auto-deployment Enabled=true,RetainStacksOnAccountRemoval=false + +aws cloudformation create-stack-instances \ + --stack-set-name plt-runner-role \ + --deployment-targets OrganizationalUnitIds=ou-xxxx-plt \ + --regions ca-central-1 + +# 3. Deploy Artifact Bucket to PLT OU (cache storage) +aws cloudformation create-stack-set \ + --stack-set-name plt-cicd-artifacts \ + --template-body file://stacksets/plt/25-cicd-artifacts.yaml \ + --parameters ParameterKey=OrganizationPrefix,ParameterValue=YOUR_ORG \ + --permission-model SERVICE_MANAGED \ + --auto-deployment Enabled=true,RetainStacksOnAccountRemoval=false + +aws cloudformation create-stack-instances \ + --stack-set-name plt-cicd-artifacts \ + --deployment-targets OrganizationalUnitIds=ou-xxxx-plt \ + --regions ca-central-1 ``` -stacksets/ -├── 00-oidc-provider-github.yaml # OIDC for GitHub Actions -├── 10-iam-cicd-roles.yaml # cicd-oidc-role, cicd-admin-role -└── 20-terraform-state-backend.yaml # S3 + DynamoDB for TF state + +### WKL OU StackSets + +```bash +# 4. Deploy Deploy Role to WKL OU (auto-deploys to WKL-NPD and WKL-PRD) +aws cloudformation create-stack-set \ + --stack-set-name wkl-deploy-role \ + --template-body file://stacksets/wkl/15-iam-deploy-role.yaml \ + --parameters ParameterKey=RunnerAccountId,ParameterValue=PLT_ACCOUNT_ID \ + --capabilities CAPABILITY_NAMED_IAM \ + --permission-model SERVICE_MANAGED \ + --auto-deployment Enabled=true,RetainStacksOnAccountRemoval=false + +aws cloudformation create-stack-instances \ + --stack-set-name wkl-deploy-role \ + --deployment-targets OrganizationalUnitIds=ou-xxxx-wkl \ + --regions ca-central-1 + +# 5. Deploy State Backend to WKL OU +aws cloudformation create-stack-set \ + --stack-set-name wkl-terraform-state \ + --template-body file://stacksets/wkl/20-terraform-state-backend.yaml \ + --parameters \ + ParameterKey=OrganizationPrefix,ParameterValue=YOUR_ORG \ + ParameterKey=RunnerAccountId,ParameterValue=PLT_ACCOUNT_ID \ + --capabilities CAPABILITY_NAMED_IAM \ + --permission-model SERVICE_MANAGED \ + --auto-deployment Enabled=true,RetainStacksOnAccountRemoval=false + +aws cloudformation create-stack-instances \ + --stack-set-name wkl-terraform-state \ + --deployment-targets OrganizationalUnitIds=ou-xxxx-wkl \ + --regions ca-central-1 ca-west-1 ``` -## Example Prompt for AI +## Automatic Onboarding + +With `--auto-deployment Enabled=true`, when new accounts are added to WKL OU: + +1. StackSet automatically deploys `cicd-deploy-role` to the new account +2. StackSet automatically deploys state backend to the new account +3. Trust policy already references PLT runner role ARN +4. New account immediately ready for CI/CD deployments +## Role Trust Relationship + +``` +PLT Account WKL Accounts +─────────── ──────────── +cicd-runner-role ──────────────> cicd-deploy-role + │ │ + ├─ OIDC trust (GitHub) ├─ Trusts PLT runner role ARN + └─ Can assume WKL deploy roles └─ Full infra permissions ``` -Generate CloudFormation StackSets for CI/CD foundation with: -- Org: -- AWS accounts: non-prod (111111111111), prod (222222222222) -- Regions: ca-central-1 (primary), ca-west-1 (DR) -- GitHub org: -Include: -1. OIDC provider for GitHub Actions -2. Role chaining: cicd-oidc-role -> cicd-admin-role -3. Terraform state backend (S3 + DynamoDB) +## OU IDs -Follow numbered file naming from docs/conventions.md -Deploy via StackSets from management account +Get your OU IDs: + +```bash +aws organizations list-organizational-units-for-parent \ + --parent-id r-xxxx # root ID ``` +Replace `ou-xxxx-plt` and `ou-xxxx-wkl` with your actual OU IDs. + ## File Naming Convention | Range | Layer | | ----- | ------------------ | | 00-09 | Identity providers | | 10-19 | IAM roles/policies | -| 20-29 | State management | +| 15-19 | Deploy roles | +| 20-29 | State & artifacts | | 30-39 | Network foundation | | 40-49 | Security baseline | - -## Deployment Order - -StackSets should be deployed in numbered order: - -1. `00-*` - OIDC providers (prerequisite for role assumption) -2. `10-*` - IAM roles (depends on OIDC) -3. `20-*` - State backends (can use roles) diff --git a/cloudformation/stacksets/plt/00-oidc-provider-github.yaml b/cloudformation/stacksets/plt/00-oidc-provider-github.yaml new file mode 100644 index 0000000..a18e831 --- /dev/null +++ b/cloudformation/stacksets/plt/00-oidc-provider-github.yaml @@ -0,0 +1,69 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + GitHub Actions OIDC Provider - PLT OU StackSet + + Deploy via StackSet targeting PLT OU from management account. + Creates the OIDC identity provider for GitHub Actions in PLT runner account. + +Parameters: + GitHubOrganization: + Type: String + Description: GitHub organization name (e.g., zsoftly) + AllowedPattern: '^[a-zA-Z0-9-]+$' + ConstraintDescription: Must be a valid GitHub organization name + + ThumbprintList: + Type: CommaDelimitedList + Description: GitHub OIDC thumbprints (rarely changes) + Default: '6938fd4d98bab03faadb97b34396831e3780aea1,1c58a3a8518e8759bf075b76b750d4f2df264fcd' + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: GitHub Configuration + Parameters: + - GitHubOrganization + - ThumbprintList + ParameterLabels: + GitHubOrganization: + default: GitHub Organization + +Resources: + GitHubOIDCProvider: + Type: AWS::IAM::OIDCProvider + Properties: + Url: https://token.actions.githubusercontent.com + ClientIdList: + - sts.amazonaws.com + ThumbprintList: !Ref ThumbprintList + Tags: + - Key: Name + Value: github-actions-oidc + - Key: Purpose + Value: cicd-authentication + - Key: OU + Value: PLT + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + +Outputs: + OIDCProviderArn: + Description: ARN of the GitHub OIDC Provider + Value: !GetAtt GitHubOIDCProvider.Arn + Export: + Name: !Sub '${AWS::StackName}-OIDCProviderArn' + + OIDCProviderUrl: + Description: URL of the OIDC Provider + Value: https://token.actions.githubusercontent.com + Export: + Name: !Sub '${AWS::StackName}-OIDCProviderUrl' + + GitHubOrganization: + Description: Configured GitHub organization + Value: !Ref GitHubOrganization + Export: + Name: !Sub '${AWS::StackName}-GitHubOrganization' diff --git a/cloudformation/stacksets/plt/10-iam-runner-role.yaml b/cloudformation/stacksets/plt/10-iam-runner-role.yaml new file mode 100644 index 0000000..bbb0518 --- /dev/null +++ b/cloudformation/stacksets/plt/10-iam-runner-role.yaml @@ -0,0 +1,173 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + CI/CD Runner Role - PLT OU StackSet + + Deploy via StackSet targeting PLT OU from management account. + Creates the runner role that assumes deploy roles in WKL accounts. + + Architecture: + PLT Account: cicd-runner-role (OIDC auth, assume WKL deploy roles) + WKL Accounts: cicd-deploy-role (trusts PLT runner role) + + Role Chaining Flow: + GitHub Actions -> OIDC -> cicd-runner-role (PLT) -> cicd-deploy-role (WKL) -> Deploy + +Parameters: + GitHubOrganization: + Type: String + Description: GitHub organization name + AllowedPattern: '^[a-zA-Z0-9-]+$' + + AllowedRepositories: + Type: String + Description: | + Repository pattern for trust policy. + Examples: my-repo, infra-*, * (all repos) + Default: '*' + + AllowedBranches: + Type: String + Description: | + Branch pattern for deployments. + Examples: main, release-*, refs/heads/main + Default: '*' + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: GitHub Configuration + Parameters: + - GitHubOrganization + - AllowedRepositories + - AllowedBranches + +Resources: + # ========================================================================== + # RUNNER ROLE - Authenticates via OIDC, assumes WKL deploy roles + # ========================================================================== + CICDRunnerRole: + Type: AWS::IAM::Role + Properties: + RoleName: cicd-runner-role + Description: | + PLT runner role. Authenticates via OIDC. + Only permission: assume cicd-deploy-role in WKL accounts. + MaxSessionDuration: 3600 # 1 hour + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: GitHubActionsOIDC + Effect: Allow + Principal: + Federated: !Sub 'arn:aws:iam::${AWS::AccountId}:oidc-provider/token.actions.githubusercontent.com' + Action: sts:AssumeRoleWithWebIdentity + Condition: + StringEquals: + 'token.actions.githubusercontent.com:aud': sts.amazonaws.com + StringLike: + 'token.actions.githubusercontent.com:sub': !Sub 'repo:${GitHubOrganization}/${AllowedRepositories}:*' + Policies: + - PolicyName: AssumeWKLDeployRoles + PolicyDocument: + Version: '2012-10-17' + Statement: + # Assume deploy roles in any WKL account + - Sid: AssumeDeployRoles + Effect: Allow + Action: sts:AssumeRole + Resource: 'arn:aws:iam::*:role/cicd-deploy-role' + Tags: + - Key: Name + Value: cicd-runner-role + - Key: Purpose + Value: cicd-runner + - Key: OU + Value: PLT + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + # ========================================================================== + # EC2 RUNNER ROLE - For self-hosted runners in PLT account + # ========================================================================== + CICDRunnerEC2Role: + Type: AWS::IAM::Role + Properties: + RoleName: cicd-runner-ec2-role + Description: | + Instance profile role for self-hosted CI/CD runners in PLT. + Allows assuming cicd-deploy-role in WKL accounts. + MaxSessionDuration: 7200 + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: EC2AssumeRole + Effect: Allow + Principal: + Service: ec2.amazonaws.com + Action: sts:AssumeRole + Policies: + - PolicyName: AssumeWKLDeployRoles + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: AssumeDeployRoles + Effect: Allow + Action: sts:AssumeRole + Resource: 'arn:aws:iam::*:role/cicd-deploy-role' + Tags: + - Key: Name + Value: cicd-runner-ec2-role + - Key: Purpose + Value: ec2-runner-profile + - Key: OU + Value: PLT + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + CICDRunnerInstanceProfile: + Type: AWS::IAM::InstanceProfile + Properties: + InstanceProfileName: cicd-runner-profile + Roles: + - !Ref CICDRunnerEC2Role + +Outputs: + RunnerRoleArn: + Description: ARN of the Runner Role (use this in GitHub Actions) + Value: !GetAtt CICDRunnerRole.Arn + Export: + Name: !Sub '${AWS::StackName}-RunnerRoleArn' + + RunnerEC2RoleArn: + Description: ARN of the EC2 Runner Role + Value: !GetAtt CICDRunnerEC2Role.Arn + Export: + Name: !Sub '${AWS::StackName}-RunnerEC2RoleArn' + + RunnerInstanceProfileArn: + Description: ARN of the EC2 Runner Instance Profile + Value: !GetAtt CICDRunnerInstanceProfile.Arn + Export: + Name: !Sub '${AWS::StackName}-RunnerInstanceProfileArn' + + RoleChainingExample: + Description: Example GitHub Actions configuration + Value: !Sub | + # GitHub Actions workflow example: + - name: Configure AWS credentials via OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${CICDRunnerRole.Arn} + aws-region: ca-central-1 + + - name: Assume WKL deploy role + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::WKL_ACCOUNT_ID:role/cicd-deploy-role + aws-region: ca-central-1 + role-chaining: true diff --git a/cloudformation/stacksets/plt/25-cicd-artifacts.yaml b/cloudformation/stacksets/plt/25-cicd-artifacts.yaml new file mode 100644 index 0000000..653fd5a --- /dev/null +++ b/cloudformation/stacksets/plt/25-cicd-artifacts.yaml @@ -0,0 +1,300 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + CI/CD Artifact & Cache Bucket - PLT OU StackSet + + Deploy via StackSet targeting PLT OU from management account. + Creates shared artifact bucket for caching dependencies and storing pipeline artifacts. + + Purpose: + - Cache Terraform providers (30-60s -> 2-5s) + - Cache Ansible collections (20-40s -> 1-3s) + - Cache package dependencies (pip, npm, go) + - Store Terraform plan artifacts between jobs + + Security: + - Short TTLs ensure security patches are applied (3-7 days) + - Hash-based cache keys limit blast radius + - Automatic cleanup prevents unbounded growth + +Parameters: + OrganizationPrefix: + Type: String + Description: Organization prefix for naming (e.g., zsoftly, acme) + AllowedPattern: '^[a-z0-9-]+$' + ConstraintDescription: Lowercase alphanumeric and hyphens only + MaxLength: 20 + + ProviderCacheTTL: + Type: Number + Description: Days to retain Terraform provider cache (recommend 7 for security patches) + Default: 7 + MinValue: 1 + MaxValue: 30 + + DependencyCacheTTL: + Type: Number + Description: Days to retain dependency caches (pip, npm, ansible) + Default: 3 + MinValue: 1 + MaxValue: 14 + + PlanArtifactTTL: + Type: Number + Description: Days to retain Terraform plan artifacts + Default: 1 + MinValue: 1 + MaxValue: 7 + + EnableVersioning: + Type: String + Description: Enable versioning for artifact recovery + AllowedValues: + - 'true' + - 'false' + Default: 'false' + +Conditions: + EnableVersioning: !Equals [!Ref EnableVersioning, 'true'] + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: Naming + Parameters: + - OrganizationPrefix + - Label: + default: Cache Expiration (Security vs Speed) + Parameters: + - ProviderCacheTTL + - DependencyCacheTTL + - PlanArtifactTTL + - Label: + default: Optional Features + Parameters: + - EnableVersioning + +Resources: + # ========================================================================== + # Artifact & Cache Bucket + # ========================================================================== + ArtifactBucket: + Type: AWS::S3::Bucket + DeletionPolicy: Delete + UpdateReplacePolicy: Delete + Properties: + BucketName: !Sub '${OrganizationPrefix}-cicd-artifacts-${AWS::AccountId}-${AWS::Region}' + # Encryption + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 + # Versioning (optional - cache doesn't usually need it) + VersioningConfiguration: + Status: !If [EnableVersioning, Enabled, Suspended] + # Block public access + PublicAccessBlockConfiguration: + BlockPublicAcls: true + BlockPublicPolicy: true + IgnorePublicAcls: true + RestrictPublicBuckets: true + # Object ownership + OwnershipControls: + Rules: + - ObjectOwnership: BucketOwnerEnforced + # Lifecycle rules for cache expiration + LifecycleConfiguration: + Rules: + # Terraform providers - 7 days default + - Id: ExpireTerraformProviders + Status: Enabled + Prefix: terraform-providers/ + ExpirationInDays: !Ref ProviderCacheTTL + + # Terraform plugins - same as providers + - Id: ExpireTerraformPlugins + Status: Enabled + Prefix: terraform-plugins/ + ExpirationInDays: !Ref ProviderCacheTTL + + # Ansible collections - 3 days default + - Id: ExpireAnsibleCollections + Status: Enabled + Prefix: ansible-collections/ + ExpirationInDays: !Ref DependencyCacheTTL + + # Python pip cache - 3 days default + - Id: ExpirePipCache + Status: Enabled + Prefix: pip-cache/ + ExpirationInDays: !Ref DependencyCacheTTL + + # Node modules - 3 days default + - Id: ExpireNodeModules + Status: Enabled + Prefix: node-modules/ + ExpirationInDays: !Ref DependencyCacheTTL + + # Go modules - 7 days (more stable) + - Id: ExpireGoModules + Status: Enabled + Prefix: go-mod/ + ExpirationInDays: !Ref ProviderCacheTTL + + # Generic dependencies + - Id: ExpireDependencies + Status: Enabled + Prefix: dependencies/ + ExpirationInDays: !Ref DependencyCacheTTL + + # Container layers - 1 day (security critical) + - Id: ExpireContainerLayers + Status: Enabled + Prefix: docker-layers/ + ExpirationInDays: 1 + + # Terraform plans - 1 day (job-specific) + - Id: ExpireTerraformPlans + Status: Enabled + Prefix: terraform-plans/ + ExpirationInDays: !Ref PlanArtifactTTL + + # Build artifacts - 1 day + - Id: ExpireBuildArtifacts + Status: Enabled + Prefix: build-artifacts/ + ExpirationInDays: !Ref PlanArtifactTTL + + # Clean up incomplete multipart uploads + - Id: AbortIncompleteUploads + Status: Enabled + AbortIncompleteMultipartUpload: + DaysAfterInitiation: 1 + + # Intelligent tiering for long-lived objects + - Id: IntelligentTiering + Status: Enabled + Prefix: archives/ + Transitions: + - StorageClass: INTELLIGENT_TIERING + TransitionInDays: 1 + + # Intelligent tiering configuration + IntelligentTieringConfigurations: + - Id: CacheOptimization + Status: Enabled + Tierings: + - AccessTier: ARCHIVE_ACCESS + Days: 90 + - AccessTier: DEEP_ARCHIVE_ACCESS + Days: 180 + + Tags: + - Key: Name + Value: !Sub '${OrganizationPrefix}-cicd-artifacts' + - Key: Purpose + Value: cicd-artifact-cache + - Key: OU + Value: PLT + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + # Bucket policy + ArtifactBucketPolicy: + Type: AWS::S3::BucketPolicy + Properties: + Bucket: !Ref ArtifactBucket + PolicyDocument: + Version: '2012-10-17' + Statement: + # Require HTTPS + - Sid: RequireHTTPS + Effect: Deny + Principal: '*' + Action: 's3:*' + Resource: + - !GetAtt ArtifactBucket.Arn + - !Sub '${ArtifactBucket.Arn}/*' + Condition: + Bool: + 'aws:SecureTransport': 'false' + + # Allow runner role full access + - Sid: AllowRunnerRole + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/cicd-runner-role' + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + - s3:ListBucket + - s3:GetBucketLocation + Resource: + - !GetAtt ArtifactBucket.Arn + - !Sub '${ArtifactBucket.Arn}/*' + + # Allow EC2 runner role + - Sid: AllowEC2RunnerRole + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/cicd-runner-ec2-role' + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + - s3:ListBucket + - s3:GetBucketLocation + Resource: + - !GetAtt ArtifactBucket.Arn + - !Sub '${ArtifactBucket.Arn}/*' + +Outputs: + ArtifactBucketName: + Description: Name of the artifact bucket + Value: !Ref ArtifactBucket + Export: + Name: !Sub '${AWS::StackName}-ArtifactBucketName' + + ArtifactBucketArn: + Description: ARN of the artifact bucket + Value: !GetAtt ArtifactBucket.Arn + Export: + Name: !Sub '${AWS::StackName}-ArtifactBucketArn' + + ArtifactBucketDomainName: + Description: Domain name of the artifact bucket + Value: !GetAtt ArtifactBucket.RegionalDomainName + Export: + Name: !Sub '${AWS::StackName}-ArtifactBucketDomainName' + + CacheKeyPrefix: + Description: Example cache key prefixes + Value: | + terraform-providers/{lock-hash}/ + terraform-plugins/{os}-{arch}/ + ansible-collections/{requirements-hash}/ + pip-cache/{requirements-hash}/ + node-modules/{lockfile-hash}/ + go-mod/{go-sum-hash}/ + terraform-plans/{run-id}/ + + UsageExample: + Description: Example usage in CI/CD pipeline + Value: !Sub | + # Set artifact bucket environment variable + export ARTIFACT_BUCKET="${ArtifactBucket}" + + # Download Terraform provider cache + CACHE_KEY="terraform-providers-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + aws s3 cp "s3://${ArtifactBucket}/$CACHE_KEY.tar.gz" /tmp/cache.tar.gz || true + if [[ -f /tmp/cache.tar.gz ]]; then + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + + # After terraform init, upload cache + tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ + aws s3 cp /tmp/cache.tar.gz "s3://${ArtifactBucket}/$CACHE_KEY.tar.gz" diff --git a/cloudformation/stacksets/wkl/15-iam-deploy-role.yaml b/cloudformation/stacksets/wkl/15-iam-deploy-role.yaml new file mode 100644 index 0000000..e7484a8 --- /dev/null +++ b/cloudformation/stacksets/wkl/15-iam-deploy-role.yaml @@ -0,0 +1,235 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + CI/CD Deploy Role - WKL OU StackSet + + Deploy via StackSet targeting WKL OU from management account. + Creates deploy role in each WKL account that trusts the PLT runner role. + + Architecture: + PLT Account: cicd-runner-role (assumes this role) + WKL Accounts: cicd-deploy-role (this template) + + Role Chaining Flow: + GitHub Actions -> OIDC -> cicd-runner-role (PLT) -> cicd-deploy-role (WKL) -> Deploy + + Automatic Onboarding: + When new accounts are added to WKL OU, StackSet auto-deploys this role. + +Parameters: + RunnerAccountId: + Type: String + Description: AWS Account ID of the PLT runner account + AllowedPattern: '^\d{12}$' + ConstraintDescription: Must be a 12-digit AWS account ID + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: Cross-Account Configuration + Parameters: + - RunnerAccountId + +Resources: + # ========================================================================== + # DEPLOY ROLE - Full permissions for CI/CD operations + # ========================================================================== + CICDDeployRole: + Type: AWS::IAM::Role + Properties: + RoleName: cicd-deploy-role + Description: | + WKL deploy role for CI/CD operations (Terraform, Ansible). + Assumed by cicd-runner-role from PLT account. + Has full permissions for infrastructure management. + MaxSessionDuration: 7200 # 2 hours + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + # Allow PLT runner role to assume this role + - Sid: AssumeFromPLTRunner + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${RunnerAccountId}:role/cicd-runner-role' + Action: sts:AssumeRole + # Allow PLT EC2 runner role to assume this role + - Sid: AssumeFromPLTEC2Runner + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${RunnerAccountId}:role/cicd-runner-ec2-role' + Action: sts:AssumeRole + ManagedPolicyArns: + - !Ref CICDDeployPolicy + Tags: + - Key: Name + Value: cicd-deploy-role + - Key: Purpose + Value: infrastructure-deploy + - Key: OU + Value: WKL + - Key: TrustedBy + Value: !Sub 'PLT:${RunnerAccountId}' + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + CICDDeployPolicy: + Type: AWS::IAM::ManagedPolicy + Properties: + ManagedPolicyName: cicd-deploy-policy + Description: Full infrastructure permissions for Terraform and Ansible + PolicyDocument: + Version: '2012-10-17' + Statement: + # Full EC2 access + - Sid: EC2Full + Effect: Allow + Action: ec2:* + Resource: '*' + # Full VPC access + - Sid: VPCFull + Effect: Allow + Action: + - ec2:CreateVpc + - ec2:DeleteVpc + - ec2:ModifyVpcAttribute + - ec2:CreateSubnet + - ec2:DeleteSubnet + - ec2:CreateRouteTable + - ec2:DeleteRouteTable + - ec2:CreateRoute + - ec2:DeleteRoute + - ec2:AssociateRouteTable + - ec2:DisassociateRouteTable + - ec2:CreateInternetGateway + - ec2:DeleteInternetGateway + - ec2:AttachInternetGateway + - ec2:DetachInternetGateway + - ec2:CreateNatGateway + - ec2:DeleteNatGateway + - ec2:AllocateAddress + - ec2:ReleaseAddress + - ec2:CreateSecurityGroup + - ec2:DeleteSecurityGroup + - ec2:AuthorizeSecurityGroupIngress + - ec2:AuthorizeSecurityGroupEgress + - ec2:RevokeSecurityGroupIngress + - ec2:RevokeSecurityGroupEgress + Resource: '*' + # Full IAM access (with guardrails) + - Sid: IAMFull + Effect: Allow + Action: iam:* + Resource: '*' + # Deny modifying foundation resources + - Sid: DenyModifyFoundation + Effect: Deny + Action: + - iam:DeleteRole + - iam:DeleteRolePolicy + - iam:DetachRolePolicy + - iam:UpdateAssumeRolePolicy + - iam:PutRolePolicy + Resource: + - !Sub 'arn:aws:iam::${AWS::AccountId}:role/cicd-*' + Condition: + StringEquals: + 'iam:ResourceTag/Foundation': 'true' + # S3 full access + - Sid: S3Full + Effect: Allow + Action: s3:* + Resource: '*' + # RDS full access + - Sid: RDSFull + Effect: Allow + Action: rds:* + Resource: '*' + # EKS full access + - Sid: EKSFull + Effect: Allow + Action: eks:* + Resource: '*' + # Lambda full access + - Sid: LambdaFull + Effect: Allow + Action: lambda:* + Resource: '*' + # CloudWatch full access + - Sid: CloudWatchFull + Effect: Allow + Action: + - cloudwatch:* + - logs:* + Resource: '*' + # Secrets Manager + - Sid: SecretsManagerFull + Effect: Allow + Action: secretsmanager:* + Resource: '*' + # SSM (for Ansible) + - Sid: SSMFull + Effect: Allow + Action: ssm:* + Resource: '*' + # DynamoDB (for state locking) + - Sid: DynamoDBFull + Effect: Allow + Action: dynamodb:* + Resource: '*' + # Route53 + - Sid: Route53Full + Effect: Allow + Action: + - route53:* + - route53domains:* + Resource: '*' + # ACM + - Sid: ACMFull + Effect: Allow + Action: acm:* + Resource: '*' + # ELB + - Sid: ELBFull + Effect: Allow + Action: elasticloadbalancing:* + Resource: '*' + # Auto Scaling + - Sid: AutoScalingFull + Effect: Allow + Action: autoscaling:* + Resource: '*' + # SNS/SQS + - Sid: MessagingFull + Effect: Allow + Action: + - sns:* + - sqs:* + Resource: '*' + # KMS + - Sid: KMSFull + Effect: Allow + Action: kms:* + Resource: '*' + # CloudFormation (for reading outputs) + - Sid: CloudFormationRead + Effect: Allow + Action: + - cloudformation:Describe* + - cloudformation:Get* + - cloudformation:List* + Resource: '*' + +Outputs: + DeployRoleArn: + Description: ARN of the Deploy Role (assumed by PLT runner) + Value: !GetAtt CICDDeployRole.Arn + Export: + Name: !Sub '${AWS::StackName}-DeployRoleArn' + + TrustedRunnerAccount: + Description: PLT account ID that can assume this role + Value: !Ref RunnerAccountId + Export: + Name: !Sub '${AWS::StackName}-TrustedRunnerAccount' diff --git a/cloudformation/stacksets/wkl/20-terraform-state-backend.yaml b/cloudformation/stacksets/wkl/20-terraform-state-backend.yaml new file mode 100644 index 0000000..ee32fb8 --- /dev/null +++ b/cloudformation/stacksets/wkl/20-terraform-state-backend.yaml @@ -0,0 +1,375 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: | + Terraform State Backend - WKL OU StackSet + + Deploy via StackSet targeting WKL OU from management account. + Creates S3 bucket and DynamoDB table for Terraform state in each WKL account. + + Resources created (per account/region): + - S3 bucket with versioning, encryption, and lifecycle + - DynamoDB table for state locking with TTL + - KMS key for encryption (optional) + + Automatic Onboarding: + When new accounts are added to WKL OU, StackSet auto-deploys state backend. + +Parameters: + OrganizationPrefix: + Type: String + Description: Organization prefix for naming (e.g., zsoftly, acme) + AllowedPattern: '^[a-z0-9-]+$' + ConstraintDescription: Lowercase alphanumeric and hyphens only + MaxLength: 20 + + RunnerAccountId: + Type: String + Description: AWS Account ID of the PLT runner account (for cross-account state access) + AllowedPattern: '^\d{12}$' + ConstraintDescription: Must be a 12-digit AWS account ID + + RetentionDays: + Type: Number + Description: Days to retain non-current state versions + Default: 90 + MinValue: 30 + MaxValue: 365 + + EnableKMSEncryption: + Type: String + Description: Use customer-managed KMS key (true) or S3-managed encryption (false) + AllowedValues: + - 'true' + - 'false' + Default: 'false' + + LockTableTTLHours: + Type: Number + Description: Hours before stale locks auto-expire + Default: 24 + MinValue: 1 + MaxValue: 168 + +Conditions: + UseKMS: !Equals [!Ref EnableKMSEncryption, 'true'] + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: Naming + Parameters: + - OrganizationPrefix + - Label: + default: Cross-Account + Parameters: + - RunnerAccountId + - Label: + default: Encryption + Parameters: + - EnableKMSEncryption + - Label: + default: Retention & Cleanup + Parameters: + - RetentionDays + - LockTableTTLHours + +Resources: + # ========================================================================== + # KMS Key (Optional) + # ========================================================================== + StateEncryptionKey: + Type: AWS::KMS::Key + Condition: UseKMS + Properties: + Description: !Sub 'Terraform state encryption key - ${OrganizationPrefix}' + EnableKeyRotation: true + KeyPolicy: + Version: '2012-10-17' + Statement: + - Sid: RootAccountAccess + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' + Action: 'kms:*' + Resource: '*' + - Sid: CICDDeployRoleAccess + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/cicd-deploy-role' + Action: + - kms:Encrypt + - kms:Decrypt + - kms:GenerateDataKey* + - kms:DescribeKey + Resource: '*' + Tags: + - Key: Name + Value: !Sub '${OrganizationPrefix}-tfstate-key' + - Key: Purpose + Value: terraform-state-encryption + - Key: OU + Value: WKL + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + StateEncryptionKeyAlias: + Type: AWS::KMS::Alias + Condition: UseKMS + Properties: + AliasName: !Sub 'alias/${OrganizationPrefix}-tfstate' + TargetKeyId: !Ref StateEncryptionKey + + # ========================================================================== + # S3 Bucket for Terraform State + # ========================================================================== + TerraformStateBucket: + Type: AWS::S3::Bucket + DeletionPolicy: Retain + UpdateReplacePolicy: Retain + Properties: + BucketName: !Sub '${OrganizationPrefix}-tfstate-${AWS::AccountId}-${AWS::Region}' + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: !If + - UseKMS + - 'aws:kms' + - AES256 + KMSMasterKeyID: !If + - UseKMS + - !GetAtt StateEncryptionKey.Arn + - !Ref AWS::NoValue + BucketKeyEnabled: !If [UseKMS, true, !Ref AWS::NoValue] + VersioningConfiguration: + Status: Enabled + PublicAccessBlockConfiguration: + BlockPublicAcls: true + BlockPublicPolicy: true + IgnorePublicAcls: true + RestrictPublicBuckets: true + OwnershipControls: + Rules: + - ObjectOwnership: BucketOwnerEnforced + LifecycleConfiguration: + Rules: + - Id: ExpireOldVersions + Status: Enabled + NoncurrentVersionExpiration: + NoncurrentDays: !Ref RetentionDays + - Id: AbortIncompleteMultipartUploads + Status: Enabled + AbortIncompleteMultipartUpload: + DaysAfterInitiation: 7 + - Id: TransitionToIntelligentTiering + Status: Enabled + Transitions: + - StorageClass: INTELLIGENT_TIERING + TransitionInDays: 30 + LoggingConfiguration: + DestinationBucketName: !Ref AccessLogsBucket + LogFilePrefix: !Sub 'tfstate-${AWS::Region}/' + Tags: + - Key: Name + Value: !Sub 'tfstate-${OrganizationPrefix}-${AWS::Region}' + - Key: Purpose + Value: terraform-state + - Key: OU + Value: WKL + - Key: Region + Value: !Ref AWS::Region + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + TerraformStateBucketPolicy: + Type: AWS::S3::BucketPolicy + Properties: + Bucket: !Ref TerraformStateBucket + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: RequireHTTPS + Effect: Deny + Principal: '*' + Action: 's3:*' + Resource: + - !GetAtt TerraformStateBucket.Arn + - !Sub '${TerraformStateBucket.Arn}/*' + Condition: + Bool: + 'aws:SecureTransport': 'false' + # Allow local deploy role + - Sid: AllowLocalDeployRole + Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:role/cicd-deploy-role' + Action: + - s3:GetObject + - s3:PutObject + - s3:DeleteObject + - s3:ListBucket + - s3:GetBucketVersioning + - s3:GetBucketLocation + Resource: + - !GetAtt TerraformStateBucket.Arn + - !Sub '${TerraformStateBucket.Arn}/*' + + # ========================================================================== + # Access Logs Bucket + # ========================================================================== + AccessLogsBucket: + Type: AWS::S3::Bucket + DeletionPolicy: Retain + UpdateReplacePolicy: Retain + Properties: + BucketName: !Sub '${OrganizationPrefix}-tfstate-logs-${AWS::AccountId}-${AWS::Region}' + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 + VersioningConfiguration: + Status: Enabled + PublicAccessBlockConfiguration: + BlockPublicAcls: true + BlockPublicPolicy: true + IgnorePublicAcls: true + RestrictPublicBuckets: true + OwnershipControls: + Rules: + - ObjectOwnership: BucketOwnerEnforced + LifecycleConfiguration: + Rules: + - Id: ExpireOldLogs + Status: Enabled + ExpirationInDays: 90 + - Id: TransitionToGlacier + Status: Enabled + Transitions: + - StorageClass: GLACIER + TransitionInDays: 30 + Tags: + - Key: Name + Value: !Sub 'tfstate-logs-${OrganizationPrefix}-${AWS::Region}' + - Key: Purpose + Value: access-logs + - Key: OU + Value: WKL + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + + AccessLogsBucketPolicy: + Type: AWS::S3::BucketPolicy + Properties: + Bucket: !Ref AccessLogsBucket + PolicyDocument: + Version: '2012-10-17' + Statement: + - Sid: S3ServerAccessLogsPolicy + Effect: Allow + Principal: + Service: logging.s3.amazonaws.com + Action: s3:PutObject + Resource: !Sub '${AccessLogsBucket.Arn}/*' + Condition: + ArnLike: + 'aws:SourceArn': !GetAtt TerraformStateBucket.Arn + StringEquals: + 'aws:SourceAccount': !Ref AWS::AccountId + + # ========================================================================== + # DynamoDB Table for State Locking + # ========================================================================== + TerraformLockTable: + Type: AWS::DynamoDB::Table + DeletionPolicy: Retain + UpdateReplacePolicy: Retain + Properties: + TableName: !Sub '${OrganizationPrefix}-tfstate-lock' + BillingMode: PAY_PER_REQUEST + AttributeDefinitions: + - AttributeName: LockID + AttributeType: S + KeySchema: + - AttributeName: LockID + KeyType: HASH + TimeToLiveSpecification: + AttributeName: ExpirationTime + Enabled: true + PointInTimeRecoverySpecification: + PointInTimeRecoveryEnabled: true + SSESpecification: + SSEEnabled: true + SSEType: KMS + KMSMasterKeyId: !If + - UseKMS + - !Ref StateEncryptionKey + - alias/aws/dynamodb + Tags: + - Key: Name + Value: !Sub 'terraform-lock-${OrganizationPrefix}' + - Key: Purpose + Value: terraform-state-locking + - Key: OU + Value: WKL + - Key: TTLHours + Value: !Ref LockTableTTLHours + - Key: ManagedBy + Value: cloudformation-stackset + - Key: Foundation + Value: 'true' + +Outputs: + StateBucketName: + Description: Name of the Terraform state bucket + Value: !Ref TerraformStateBucket + Export: + Name: !Sub '${AWS::StackName}-StateBucketName' + + StateBucketArn: + Description: ARN of the Terraform state bucket + Value: !GetAtt TerraformStateBucket.Arn + Export: + Name: !Sub '${AWS::StackName}-StateBucketArn' + + LockTableName: + Description: Name of the DynamoDB lock table + Value: !Ref TerraformLockTable + Export: + Name: !Sub '${AWS::StackName}-LockTableName' + + LockTableArn: + Description: ARN of the DynamoDB lock table + Value: !GetAtt TerraformLockTable.Arn + Export: + Name: !Sub '${AWS::StackName}-LockTableArn' + + KMSKeyArn: + Description: ARN of the KMS encryption key (if enabled) + Condition: UseKMS + Value: !GetAtt StateEncryptionKey.Arn + Export: + Name: !Sub '${AWS::StackName}-KMSKeyArn' + + Region: + Description: AWS Region where resources are deployed + Value: !Ref AWS::Region + Export: + Name: !Sub '${AWS::StackName}-Region' + + BackendConfig: + Description: Terraform backend configuration snippet + Value: !Sub | + terraform { + backend "s3" { + bucket = "${TerraformStateBucket}" + key = "/terraform.tfstate" + region = "${AWS::Region}" + dynamodb_table = "${TerraformLockTable}" + encrypt = true + } + } diff --git a/docs/authentication.md b/docs/authentication.md index d05a5b5..b1f408a 100644 --- a/docs/authentication.md +++ b/docs/authentication.md @@ -1,60 +1,67 @@ # Authentication -Role chaining pattern for CI/CD pipelines on AWS. +Cross-account role chaining pattern for CI/CD pipelines on AWS. -## AWS Account Model +## OU Model -| Account | Environments | Roles | -| -------- | ------------- | ------------------------------- | -| Non-Prod | dev, qat | cicd-oidc-role, cicd-admin-role | -| Prod | stg, prod, dr | cicd-oidc-role, cicd-admin-role | +| OU | Account(s) | Roles | +| ------- | ------------- | ---------------- | +| PLT | PLT-Runner | cicd-runner-role | +| WKL-NPD | SBX, DEV, QAT | cicd-deploy-role | +| WKL-PRD | STG, PRD, DR | cicd-deploy-role | -Each account has its own set of roles. Pipeline assumes role in target account. +Runner in PLT assumes deploy roles in WKL accounts. ## Pattern ```mermaid flowchart LR - A[CI/CD Runner] --> B[Auth Role
minimal perms] - B --> C[Admin Role
target account] - C --> D[Deploy] + A[CI/CD Platform] --> B[OIDC Provider] + B --> C[cicd-runner-role
PLT Account] + C --> D[cicd-deploy-role
WKL Account] + D --> E[Deploy] ``` -## Why Role Chaining? +## Why Cross-Account Role Chaining? -| Benefit | Description | -| ------------- | ---------------------------------------------------------- | -| Separation | Auth role handles identity, admin role handles permissions | -| Auditability | Single admin role per account to review | -| Flexibility | Multiple auth methods share same admin role | -| Cross-account | Same pattern works for both accounts | +| Benefit | Description | +| ------------------- | --------------------------------------------------- | +| **Isolation** | Runner has no infra permissions, only assume role | +| **Blast Radius** | Compromise of one WKL account doesn't affect others | +| **Auto-Onboarding** | New WKL accounts automatically get deploy roles | +| **Auditability** | Single deploy role per WKL account to review | +| **Zero Secrets** | OIDC tokens, no stored credentials | -## Roles (per account) +## Roles -| Role | Purpose | Permissions | -| ------------------ | ------------------------- | ----------------------- | -| `cicd-oidc-role` | OIDC auth (GitHub/GitLab) | `sts:AssumeRole` only | -| `cicd-runner-role` | EC2 instance profile | `sts:AssumeRole` only | -| `cicd-admin-role` | Infrastructure operations | Full deploy permissions | +| Role | OU | Purpose | Permissions | +| ------------------ | --- | ------------------------- | ----------------------- | +| `cicd-runner-role` | PLT | OIDC auth + cross-account | `sts:AssumeRole` to WKL | +| `cicd-deploy-role` | WKL | Infrastructure operations | Full deploy permissions | ## Auth Methods | Method | Use Case | Secrets Stored | | ------------------ | ------------------------------ | -------------- | | OIDC + Role Chain | GitHub Actions, GitLab Premium | None | -| IMDv2 + Role Chain | Self-hosted EC2 runners | None | +| IMDv2 + Role Chain | Self-hosted EC2 runners in PLT | None | ## Platform Config ### GitHub Actions ```yaml -# Role ARNs are not sensitive - hardcode in workflow or use variables +# Role ARNs in PLT account env: - OIDC_ROLE_ARN_NONPROD: arn:aws:iam::111111111111:role/cicd-oidc-role - OIDC_ROLE_ARN_PROD: arn:aws:iam::222222222222:role/cicd-oidc-role - ADMIN_ROLE_ARN_NONPROD: arn:aws:iam::111111111111:role/cicd-admin-role - ADMIN_ROLE_ARN_PROD: arn:aws:iam::222222222222:role/cicd-admin-role + RUNNER_ROLE_ARN: arn:aws:iam::111111111111:role/cicd-runner-role + + # WKL account deploy roles + DEPLOY_ROLE_SBX: arn:aws:iam::222222222222:role/cicd-deploy-role + DEPLOY_ROLE_DEV: arn:aws:iam::333333333333:role/cicd-deploy-role + DEPLOY_ROLE_QAT: arn:aws:iam::444444444444:role/cicd-deploy-role + DEPLOY_ROLE_STG: arn:aws:iam::555555555555:role/cicd-deploy-role + DEPLOY_ROLE_PRD: arn:aws:iam::666666666666:role/cicd-deploy-role + DEPLOY_ROLE_DR: arn:aws:iam::777777777777:role/cicd-deploy-role permissions: id-token: write @@ -64,25 +71,55 @@ permissions: ### GitLab CI ```yaml -# Variables (per environment) - not secrets, role ARNs are public +# Variables (per environment) variables: - OIDC_ROLE_ARN: arn:aws:iam::${AWS_ACCOUNT_ID}:role/cicd-oidc-role - ADMIN_ROLE_ARN: arn:aws:iam::${AWS_ACCOUNT_ID}:role/cicd-admin-role + RUNNER_ROLE_ARN: arn:aws:iam::${PLT_ACCOUNT_ID}:role/cicd-runner-role + DEPLOY_ROLE_ARN: arn:aws:iam::${WKL_ACCOUNT_ID}:role/cicd-deploy-role ``` ## Cross-Account Flow ``` -Deploy to Non-Prod (dev, qat): - Runner → Non-Prod OIDC Role → Non-Prod Admin Role → Deploy +Deploy to WKL-NPD (sbx, dev, qat): + GitHub → OIDC → PLT Runner Role → WKL-NPD Deploy Role → Deploy + +Deploy to WKL-PRD (stg, prod, dr): + GitHub → OIDC → PLT Runner Role → WKL-PRD Deploy Role → Deploy +``` + +## Trust Policies + +**Runner Role (PLT OU):** + +```yaml +AssumeRolePolicyDocument: + Statement: + - Effect: Allow + Principal: + Federated: !Sub arn:aws:iam::${AWS::AccountId}:oidc-provider/token.actions.githubusercontent.com + Action: sts:AssumeRoleWithWebIdentity + Condition: + StringEquals: + token.actions.githubusercontent.com:aud: sts.amazonaws.com + StringLike: + token.actions.githubusercontent.com:sub: repo:org/*:* +``` -Deploy to Prod (stg, prod, dr): - Runner → Prod OIDC Role → Prod Admin Role → Deploy +**Deploy Role (WKL OU):** + +```yaml +AssumeRolePolicyDocument: + Statement: + - Effect: Allow + Principal: + AWS: arn:aws:iam::PLT_ACCOUNT_ID:role/cicd-runner-role + Action: sts:AssumeRole ``` ## Security - Use OIDC (zero stored secrets) -- Separate roles per account +- Runner role in PLT, deploy roles in WKL - Restrict OIDC trust to specific repos - Short session durations (1-2 hours) +- Deploy roles trust only the runner role ARN diff --git a/docs/caching.md b/docs/caching.md new file mode 100644 index 0000000..3de7584 --- /dev/null +++ b/docs/caching.md @@ -0,0 +1,477 @@ +# CI/CD Caching Strategy + +Caching dependencies dramatically reduces pipeline execution time. + +## The Problem + +| Task | Without Cache | With Cache | Savings | +| ---------------------------- | ------------- | ---------- | ------- | +| Download Terraform providers | 30-60s | 2-5s | ~90% | +| Install Ansible collections | 20-40s | 1-3s | ~92% | +| pip install | 45-90s | 3-8s | ~91% | +| npm install | 60-120s | 5-15s | ~88% | +| go mod download | 30-60s | 2-5s | ~92% | +| docker pull base images | 60-120s | 5-10s | ~92% | + +**A 5-minute pipeline becomes 1-2 minutes with proper caching.** + +## Cache Locations + +### Artifact Bucket (PLT Account) + +``` +s3://${ORG}-cicd-artifacts-${PLT_ACCOUNT_ID}-${REGION}/ +├── terraform-providers/{hash}/ # 7 day TTL +├── terraform-plugins/{os}-{arch}/ # 7 day TTL +├── ansible-collections/{hash}/ # 3 day TTL +├── pip-cache/{hash}/ # 3 day TTL +├── node-modules/{hash}/ # 3 day TTL +├── go-mod/{hash}/ # 7 day TTL +├── docker-layers/{hash}/ # 1 day TTL +└── terraform-plans/{run-id}/ # 1 day TTL +``` + +### Platform-Native Cache + +| Platform | Cache Type | Limit | Scope | +| -------------- | ------------- | ------- | ---------- | +| GitHub Actions | actions/cache | 10 GB | Repository | +| GitLab CI | CI/CD cache | Per-job | Runner | +| Jenkins | Workspace | Local | Node | + +## Cache Keys + +Use content-based hashing for cache keys: + +```bash +# Terraform providers +terraform-providers-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1) + +# Ansible collections +ansible-collections-$(sha256sum requirements.yml | cut -d' ' -f1) + +# Python pip +pip-cache-$(sha256sum requirements.txt | cut -d' ' -f1) + +# Node.js +node-modules-$(sha256sum package-lock.json | cut -d' ' -f1) + +# Go +go-mod-$(sha256sum go.sum | cut -d' ' -f1) +``` + +## Expiration Policy (CRITICAL) + +**Caches MUST expire to receive security updates.** + +| Cache Type | TTL | Reason | +| ------------------- | ------ | ----------------------------------- | +| Terraform providers | 7 days | Large, versioned, less frequent CVE | +| Go modules | 7 days | Versioned, less frequent updates | +| Ansible collections | 3 days | May have security fixes | +| Python packages | 3 days | Frequent security patches | +| Node modules | 3 days | Frequent security patches | +| Container layers | 1 day | Base images get CVE fixes daily | +| Plan artifacts | 1 day | Job-specific, no reuse value | + +### Why Not Longer TTLs? + +| Risk | Impact | +| ----------------- | ------------------------------ | +| CVE in cached dep | Vulnerable until cache expires | +| Stale versions | Missing bug fixes | +| Cache poisoning | Longer exposure window | +| Storage costs | Unbounded growth | + +## Terraform Caching + +### Plugin Cache Directory + +```bash +# Set environment variable +export TF_PLUGIN_CACHE_DIR="$HOME/.terraform.d/plugin-cache" + +# Or use CLI config +cat > ~/.terraformrc </dev/null || { + echo "[INFO] No cache found, will create after init" + return 1 + } + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/tf-cache.tar.gz -C ~/.terraform.d/ + echo "[OK] Cache restored" +} + +upload_cache() { + if [[ ! -d ~/.terraform.d/plugin-cache ]]; then + echo "[WARN] No plugin cache to upload" + return 0 + fi + echo "[INFO] Uploading Terraform cache..." + tar -czf /tmp/tf-cache.tar.gz -C ~/.terraform.d/ plugin-cache/ + aws s3 cp /tmp/tf-cache.tar.gz "s3://${CACHE_BUCKET}/${CACHE_KEY}.tar.gz" + echo "[OK] Cache uploaded" +} + +case "${1:-download}" in + download) download_cache ;; + upload) upload_cache ;; + *) echo "Usage: $0 {download|upload}" >&2; exit 1 ;; +esac +``` + +### Provider Mirror (Air-Gapped) + +```hcl +# For air-gapped or restricted environments +provider_installation { + filesystem_mirror { + path = "/opt/terraform/providers" + include = ["registry.terraform.io/*/*"] + } + direct { + exclude = ["registry.terraform.io/*/*"] + } +} +``` + +## Ansible Caching + +### Collection Cache + +```bash +# Install to cacheable path +export ANSIBLE_COLLECTIONS_PATH=~/.ansible/collections +ansible-galaxy collection install -r requirements.yml -p "$ANSIBLE_COLLECTIONS_PATH" +``` + +### S3 Backend Cache + +```bash +#!/bin/bash +# scripts/ansible-cache.sh + +set -euo pipefail + +CACHE_BUCKET="${ARTIFACT_BUCKET:-}" +REQ_FILE="requirements.yml" + +if [[ -z "$CACHE_BUCKET" ]]; then + echo "[WARN] ARTIFACT_BUCKET not set, skipping cache" + exit 0 +fi + +CACHE_KEY="ansible-collections-$(sha256sum "$REQ_FILE" | cut -d' ' -f1)" + +download_cache() { + aws s3 cp "s3://${CACHE_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/ansible-cache.tar.gz 2>/dev/null || return 1 + mkdir -p ~/.ansible/collections + tar -xzf /tmp/ansible-cache.tar.gz -C ~/.ansible/ + echo "[OK] Ansible cache restored" +} + +upload_cache() { + if [[ ! -d ~/.ansible/collections ]]; then + return 0 + fi + tar -czf /tmp/ansible-cache.tar.gz -C ~/.ansible/ collections/ + aws s3 cp /tmp/ansible-cache.tar.gz "s3://${CACHE_BUCKET}/${CACHE_KEY}.tar.gz" + echo "[OK] Ansible cache uploaded" +} + +case "${1:-download}" in + download) download_cache ;; + upload) upload_cache ;; + *) echo "Usage: $0 {download|upload}" >&2; exit 1 ;; +esac +``` + +## GitHub Actions + +### Terraform Cache + +```yaml +jobs: + terraform: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Terraform providers + uses: actions/cache@v4 + with: + path: ~/.terraform.d/plugin-cache + key: terraform-${{ runner.os }}-${{ hashFiles('**/.terraform.lock.hcl') }} + restore-keys: | + terraform-${{ runner.os }}- + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Terraform Init + run: terraform init + env: + TF_PLUGIN_CACHE_DIR: ~/.terraform.d/plugin-cache +``` + +### Ansible Cache + +```yaml +jobs: + ansible: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Ansible collections + uses: actions/cache@v4 + with: + path: ~/.ansible/collections + key: ansible-${{ hashFiles('**/requirements.yml') }} + restore-keys: | + ansible- + + - name: Install collections + run: ansible-galaxy collection install -r requirements.yml + env: + ANSIBLE_COLLECTIONS_PATH: ~/.ansible/collections +``` + +### S3 Backend Cache + +```yaml +jobs: + terraform: + runs-on: ubuntu-latest + env: + ARTIFACT_BUCKET: ${{ vars.ARTIFACT_BUCKET }} + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.RUNNER_ROLE_ARN }} + aws-region: ca-central-1 + + - name: Download cache from S3 + run: | + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + aws s3 cp "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + if [[ -f /tmp/cache.tar.gz ]]; then + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + + - name: Terraform Init + run: terraform init + env: + TF_PLUGIN_CACHE_DIR: ~/.terraform.d/plugin-cache + + - name: Upload cache to S3 + if: always() + run: | + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ 2>/dev/null || true + aws s3 cp /tmp/cache.tar.gz "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" || true +``` + +## GitLab CI + +### Terraform Cache + +```yaml +variables: + TF_PLUGIN_CACHE_DIR: ${CI_PROJECT_DIR}/.terraform-cache + +.terraform-cache: + cache: + key: + files: + - .terraform.lock.hcl + prefix: terraform + paths: + - .terraform-cache/ + policy: pull-push + +terraform-init: + extends: .terraform-cache + script: + - mkdir -p ${TF_PLUGIN_CACHE_DIR} + - terraform init +``` + +### Ansible Cache + +```yaml +.ansible-cache: + cache: + key: + files: + - ansible/requirements.yml + prefix: ansible + paths: + - .ansible-collections/ + policy: pull-push + +ansible-playbook: + extends: .ansible-cache + variables: + ANSIBLE_COLLECTIONS_PATH: ${CI_PROJECT_DIR}/.ansible-collections + script: + - ansible-galaxy collection install -r requirements.yml + - ansible-playbook playbooks/site.yml +``` + +## Jenkins + +### Pipeline Cache + +```groovy +pipeline { + agent any + + environment { + TF_PLUGIN_CACHE_DIR = "${WORKSPACE}/.terraform-cache" + ANSIBLE_COLLECTIONS_PATH = "${WORKSPACE}/.ansible-collections" + } + + stages { + stage('Restore Cache') { + steps { + sh ''' + mkdir -p ${TF_PLUGIN_CACHE_DIR} + mkdir -p ${ANSIBLE_COLLECTIONS_PATH} + ''' + // Use shared library or S3 for persistent cache + } + } + + stage('Terraform Init') { + steps { + sh 'terraform init' + } + } + } + + post { + always { + // Save cache to S3 or shared storage + } + } +} +``` + +## Cache Invalidation + +### Force Refresh + +```yaml +# GitHub Actions - add date to key +key: terraform-${{ runner.os }}-${{ hashFiles('**/.terraform.lock.hcl') }}-${{ github.run_id }} + +# Or use workflow input +on: + workflow_dispatch: + inputs: + refresh_cache: + description: "Force cache refresh" + type: boolean + default: false + +jobs: + build: + steps: + - uses: actions/cache@v4 + if: ${{ !inputs.refresh_cache }} + # ... +``` + +### Scheduled Refresh + +```yaml +# Weekly cache refresh for security +on: + schedule: + - cron: "0 0 * * 0" # Every Sunday at midnight + +jobs: + refresh-cache: + runs-on: ubuntu-latest + steps: + - name: Clear GitHub cache + run: | + gh cache delete --all + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} +``` + +## Storage Options + +| Provider | Pros | Cons | Cost | +| ----------------- | ------------------------ | ------------------ | ---------- | +| **AWS S3** | Native IAM, low latency | Egress fees | ~$0.023/GB | +| **Cloudflare R2** | Zero egress, global edge | Separate auth | ~$0.015/GB | +| **GitHub Cache** | Built-in, no setup | 10GB limit, 7d TTL | Free | +| **GitLab Cache** | Built-in | Runner-scoped | Free | +| **Self-hosted** | Full control | Maintenance burden | Varies | + +## Cloudflare R2 Setup + +```bash +# Install wrangler +npm install -g wrangler + +# Create R2 bucket +wrangler r2 bucket create cicd-artifacts + +# Generate API token with R2 permissions +# Store in CI/CD secrets: CF_ACCOUNT_ID, CF_R2_ACCESS_KEY_ID, CF_R2_SECRET_ACCESS_KEY +``` + +```yaml +# GitHub Actions with R2 +- name: Configure R2 + run: | + aws configure set aws_access_key_id ${{ secrets.CF_R2_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.CF_R2_SECRET_ACCESS_KEY }} + aws configure set region auto + +- name: Download cache from R2 + run: | + aws s3 cp s3://cicd-artifacts/terraform-cache.tar.gz /tmp/ \ + --endpoint-url https://${{ secrets.CF_ACCOUNT_ID }}.r2.cloudflarestorage.com || true +``` + +## Best Practices + +1. **Use content-based keys** - Hash lockfiles, not branches +2. **Set appropriate TTLs** - Balance speed vs security +3. **Clean up regularly** - Lifecycle rules or scheduled jobs +4. **Monitor cache hit rates** - Track effectiveness +5. **Test cache restoration** - Verify cached content works +6. **Document cache keys** - Make invalidation predictable diff --git a/docs/conventions.md b/docs/conventions.md index b825e0c..af7644a 100644 --- a/docs/conventions.md +++ b/docs/conventions.md @@ -2,32 +2,49 @@ Numbered prefixes for correct sorting across CI/CD systems. -## AWS Account Model +## OU Hierarchy -| Account | Environments | Region | -| -------- | ---------------------- | ---------------------------- | -| Non-Prod | 10-dev, 20-qat | ca-central-1 | -| Prod | 40-stg, 70-prod, 90-dr | ca-central-1, ca-west-1 (DR) | +| OU Code | Full Name | Purpose | +| ----------- | ----------------- | -------------------------------- | +| **PLT** | Platform | CI/CD runners and shared tooling | +| **WKL** | Workloads | Application environments | +| **WKL-NPD** | Workloads-NonProd | Development and testing | +| **WKL-PRD** | Workloads-Prod | Production and DR | + +``` +Root +├── PLT OU +│ └── PLT-Runner Account +│ +└── WKL OU + ├── WKL-NPD OU + │ ├── SBX, DEV, QAT + │ + └── WKL-PRD OU + └── STG, PRD, DR +``` + +## Environments + +| Prefix | Environment | OU | Account | Region | +| ------ | ----------- | ------- | ------- | ------------ | +| 00 | runner | PLT | PLT | ca-central-1 | +| 05 | sbx | WKL-NPD | SBX | ca-central-1 | +| 10 | dev | WKL-NPD | DEV | ca-central-1 | +| 20 | qat | WKL-NPD | QAT | ca-central-1 | +| 40 | stg | WKL-PRD | STG | ca-central-1 | +| 70 | prod | WKL-PRD | PRD | ca-central-1 | +| 90 | dr | WKL-PRD | DR | ca-west-1 | ## Why Numbered Prefixes? Alphabetical sorting breaks logical order: ``` -dev, prod, qat, stg → Wrong (prod before qat) -10-dev, 20-qat, 40-stg, 70-prod → Correct +dev, prod, qat, sbx, stg → Wrong (prod before qat) +05-sbx, 10-dev, 20-qat, 40-stg, 70-prod, 90-dr → Correct ``` -## Environments - -| Prefix | Environment | Account | Purpose | -| ------ | ----------- | -------- | ----------------------------- | -| 10 | dev | Non-Prod | Development | -| 20 | qat | Non-Prod | Testing | -| 40 | stg | Prod | Staging | -| 70 | prod | Prod | Production | -| 90 | dr | Prod | Disaster recovery (ca-west-1) | - **Gaps allow future growth** (e.g., 75-prod-canary, 95-dr-warm) ## Files/Modules @@ -48,13 +65,16 @@ dev, prod, qat, stg → Wrong (prod before qat) ## Quick Reference ``` -ACCOUNTS ENVIRONMENTS FILES -──────── ──────────── ───── -Non-Prod 10 dev 00-09 Foundation - 20 qat 10-19 Identity - 20-29 Network -Prod 40 stg 30-39 Compute - 70 prod ... +OUs ENVIRONMENTS FILES +─── ──────────── ───── +PLT 00 runner 00-09 Foundation + 10-19 Identity +WKL-NPD 05 sbx 20-29 Network + 10 dev 30-39 Compute + 20 qat ... + +WKL-PRD 40 stg + 70 prod 90 dr ``` @@ -63,3 +83,4 @@ Prod 40 stg 30-39 Compute - Always 2+ digits (01, not 1) - Leave gaps for growth - Consistent across all tools +- Use OU codes (PLT, WKL, WKL-NPD, WKL-PRD) in documentation diff --git a/docs/pipeline-rules.md b/docs/pipeline-rules.md index eae3157..824c878 100644 --- a/docs/pipeline-rules.md +++ b/docs/pipeline-rules.md @@ -1,13 +1,14 @@ # Pipeline Rules -When pipelines run and deployment patterns. +When pipelines run, deployment patterns, and caching strategies. -## AWS Account Model +## OU Model -| Account | Environments | Deploy From | -| -------- | ------------- | ---------------------------- | -| Non-Prod | dev, qat | PR or main | -| Prod | stg, prod, dr | main only (prod requires CR) | +| OU | Environments | Deploy From | +| ------- | ------------- | ---------------------------- | +| PLT | runner | N/A (infrastructure only) | +| WKL-NPD | sbx, dev, qat | PR or main | +| WKL-PRD | stg, prod, dr | main only (prod requires CR) | ## Triggers @@ -24,26 +25,27 @@ When pipelines run and deployment patterns. ```mermaid flowchart LR - subgraph NonProd[Non-Prod Account] - A[10-dev] --> B[20-qat] + subgraph NPD[WKL-NPD OU] + A[05-sbx] --> B[10-dev] --> C[20-qat] end - subgraph Prod[Prod Account] - C[40-stg] --> D[70-prod] --> E[90-dr] + subgraph PRD[WKL-PRD OU] + D[40-stg] --> E[70-prod] --> F[90-dr] end - B --> C + C --> D ``` ## Rules by Environment -| Stage | Account | PR/MR | Main | Requires CR | -| -------- | -------- | ------- | ------ | ----------- | -| validate | - | auto | auto | no | -| plan | - | auto | auto | no | -| 10-dev | Non-Prod | manual | manual | no | -| 20-qat | Non-Prod | blocked | manual | no | -| 40-stg | Prod | blocked | manual | no | -| 70-prod | Prod | blocked | manual | **yes** | -| 90-dr | Prod | blocked | manual | no | +| Stage | OU | PR/MR | Main | Requires CR | +| -------- | ------- | ------- | ------ | ----------- | +| validate | - | auto | auto | no | +| plan | - | auto | auto | no | +| 05-sbx | WKL-NPD | manual | manual | no | +| 10-dev | WKL-NPD | manual | manual | no | +| 20-qat | WKL-NPD | blocked | manual | no | +| 40-stg | WKL-PRD | blocked | manual | no | +| 70-prod | WKL-PRD | blocked | manual | **yes** | +| 90-dr | WKL-PRD | blocked | manual | no | ## Path Filtering @@ -64,13 +66,129 @@ Production (70-prod) requires CR because: - Rollback plan documented - Stakeholder awareness +--- + +## Caching Strategy + +### Why Cache? + +| Task | Without Cache | With Cache | Savings | +| ---------------------------- | ------------- | ---------- | ------- | +| Download Terraform providers | 30-60s | 2-5s | ~90% | +| Install Ansible collections | 20-40s | 1-3s | ~92% | +| pip/npm install | 45-90s | 3-8s | ~91% | + +**A 5-minute pipeline becomes 1-2 minutes with proper caching.** + +### Cache Locations + +| Cache Type | Location | TTL | +| ----------- | ----------------------------- | ------ | +| Terraform | `~/.terraform.d/plugin-cache` | 7 days | +| Ansible | `~/.ansible/collections` | 3 days | +| pip | `~/.cache/pip` | 3 days | +| npm | `~/.npm` or `node_modules/` | 3 days | +| S3 artifact | `s3://${ARTIFACT_BUCKET}/` | varies | + +### Cache Keys (Content-Based) + +```bash +# Use lockfile hash for cache keys +terraform-${{ hashFiles('.terraform.lock.hcl') }} +ansible-${{ hashFiles('requirements.yml') }} +pip-${{ hashFiles('requirements.txt') }} +npm-${{ hashFiles('package-lock.json') }} +``` + +### Expiration Policy (CRITICAL) + +**Caches MUST expire for security updates:** + +| Artifact | TTL | Reason | +| ----------------- | ------ | ---------------------------- | +| Terraform plugins | 7 days | Versioned, less frequent CVE | +| Ansible roles | 3 days | May have security fixes | +| pip/npm packages | 3 days | Frequent security patches | +| Container images | 1 day | Base images get CVE fixes | +| Plan artifacts | 1 day | Job-specific, no reuse | + +### Pipeline Stages with Caching + +```yaml +stages: + - restore-cache # Download from S3/cache + - validate # Format, lint + - plan # Terraform plan + - save-cache # Upload to S3/cache + - deploy # Apply (manual) +``` + +### Cache Invalidation + +Force refresh when needed: + +```yaml +# Weekly scheduled refresh +on: + schedule: + - cron: '0 0 * * 0' # Every Sunday + +# Manual refresh input +on: + workflow_dispatch: + inputs: + refresh_cache: + type: boolean + default: false +``` + +> **Reference:** See [docs/caching.md](caching.md) for complete implementation. + +--- + ## Platform Implementation | Concept | GitHub | GitLab | Jenkins | | ------------------- | ---------------------------------- | ----------------------- | --------------- | | Skip feature branch | Only `pull_request` + `push: main` | `workflow: rules:` | Branch filter | -| Manual | Environment protection | `when: manual` | `input` step | +| Manual deploy | Environment protection | `when: manual` | `input` step | | CR check | `inputs.change_request != ''` | `$CHANGE_REQUEST != ""` | Parameter check | | Path filter | `paths:` | `changes:` | `changeset` | +| Cache (native) | `actions/cache@v4` | `cache:` keyword | Workspace | +| Cache (S3) | AWS CLI | AWS CLI | AWS CLI | + +### GitHub Actions Cache + +```yaml +- uses: actions/cache@v4 + with: + path: ~/.terraform.d/plugin-cache + key: terraform-${{ runner.os }}-${{ hashFiles('**/.terraform.lock.hcl') }} + restore-keys: terraform-${{ runner.os }}- +``` + +### GitLab CI Cache + +```yaml +cache: + key: + files: + - .terraform.lock.hcl + paths: + - .terraform/providers/ + policy: pull-push +``` + +### S3 Backend Cache (All Platforms) + +```bash +# Download +CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" +aws s3 cp "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + +# Upload (after init) +tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ +aws s3 cp /tmp/cache.tar.gz "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" +``` **GitLab note:** Rules cannot be inherited from templates. Define at job level. diff --git a/gitlab-ci/README.md b/gitlab-ci/README.md index 43ecef5..105d759 100644 --- a/gitlab-ci/README.md +++ b/gitlab-ci/README.md @@ -62,3 +62,74 @@ workflow: # Block feature branch pushes - when: never ``` + +## Caching Strategy + +Use GitLab's native cache with content-based keys: + +### Terraform Cache + +```yaml +variables: + TF_PLUGIN_CACHE_DIR: ${CI_PROJECT_DIR}/.terraform-cache + +.terraform-cache: + cache: + key: + files: + - .terraform.lock.hcl + prefix: terraform + paths: + - .terraform-cache/ + policy: pull-push + +terraform-init: + extends: .terraform-cache + script: + - mkdir -p ${TF_PLUGIN_CACHE_DIR} + - terraform init +``` + +### Ansible Cache + +```yaml +.ansible-cache: + cache: + key: + files: + - ansible/requirements.yml + prefix: ansible + paths: + - .ansible-collections/ + policy: pull-push + +ansible-playbook: + extends: .ansible-cache + variables: + ANSIBLE_COLLECTIONS_PATH: ${CI_PROJECT_DIR}/.ansible-collections + script: + - ansible-galaxy collection install -r requirements.yml + - ansible-playbook playbooks/site.yml +``` + +### S3 Backend Cache (Persistent) + +For caching across runners/projects: + +```yaml +.s3-cache: + before_script: + - CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + - aws s3 cp "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + - | + if [[ -f /tmp/cache.tar.gz ]]; then + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + after_script: + - CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + - tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ 2>/dev/null || true + - aws s3 cp /tmp/cache.tar.gz "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" || true +``` + +See [docs/caching.md](../docs/caching.md) for complete details. diff --git a/jenkins/README.md b/jenkins/README.md index 90e81e5..bd9f020 100644 --- a/jenkins/README.md +++ b/jenkins/README.md @@ -123,3 +123,109 @@ pipeline { } } ``` + +## Caching Strategy + +Jenkins lacks native caching, so use workspace persistence or S3: + +### Workspace Cache + +```groovy +pipeline { + agent any + + environment { + TF_PLUGIN_CACHE_DIR = "${WORKSPACE}/.terraform-cache" + ANSIBLE_COLLECTIONS_PATH = "${WORKSPACE}/.ansible-collections" + } + + stages { + stage('Setup Cache Dirs') { + steps { + sh ''' + mkdir -p ${TF_PLUGIN_CACHE_DIR} + mkdir -p ${ANSIBLE_COLLECTIONS_PATH} + ''' + } + } + + stage('Terraform Init') { + steps { + sh 'terraform init' + } + } + } +} +``` + +### S3 Backend Cache (Recommended) + +For persistent caching across builds and agents: + +```groovy +pipeline { + agent any + + environment { + ARTIFACT_BUCKET = 'your-org-cicd-artifacts-123456789012-ca-central-1' + TF_PLUGIN_CACHE_DIR = "${HOME}/.terraform.d/plugin-cache" + } + + stages { + stage('Restore Cache') { + steps { + sh ''' + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + mkdir -p ${TF_PLUGIN_CACHE_DIR} + aws s3 cp "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + if [[ -f /tmp/cache.tar.gz ]]; then + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + ''' + } + } + + stage('Terraform Init') { + steps { + sh 'terraform init' + } + } + + stage('Save Cache') { + steps { + sh ''' + CACHE_KEY="terraform-$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ 2>/dev/null || true + aws s3 cp /tmp/cache.tar.gz "s3://${ARTIFACT_BUCKET}/${CACHE_KEY}.tar.gz" || true + ''' + } + } + } +} +``` + +### Shared Library for Cache + +```groovy +// vars/terraformCache.groovy +def download(String artifactBucket) { + sh """ + CACHE_KEY="terraform-\$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + aws s3 cp "s3://${artifactBucket}/\${CACHE_KEY}.tar.gz" /tmp/cache.tar.gz || true + if [[ -f /tmp/cache.tar.gz ]]; then + mkdir -p ~/.terraform.d/plugin-cache + tar -xzf /tmp/cache.tar.gz -C ~/.terraform.d/ + fi + """ +} + +def upload(String artifactBucket) { + sh """ + CACHE_KEY="terraform-\$(sha256sum .terraform.lock.hcl | cut -d' ' -f1)" + tar -czf /tmp/cache.tar.gz -C ~/.terraform.d/ plugin-cache/ 2>/dev/null || true + aws s3 cp /tmp/cache.tar.gz "s3://${artifactBucket}/\${CACHE_KEY}.tar.gz" || true + """ +} +``` + +See [docs/caching.md](../docs/caching.md) for complete details.