diff --git a/.cicd/app.yaml b/.cicd/app.yaml new file mode 100644 index 0000000000..340cd69ce5 --- /dev/null +++ b/.cicd/app.yaml @@ -0,0 +1,2 @@ +app_name: full-stack-fastapi-template +deployment_target: ecs-fargate diff --git a/.cicd/cdk/bootstrap-apprunner.ts b/.cicd/cdk/bootstrap-apprunner.ts new file mode 100644 index 0000000000..b768e809ef --- /dev/null +++ b/.cicd/cdk/bootstrap-apprunner.ts @@ -0,0 +1,77 @@ +import * as cdk from "aws-cdk-lib"; +import * as apprunner from "aws-cdk-lib/aws-apprunner"; +import * as ecr from "aws-cdk-lib/aws-ecr"; +import * as iam from "aws-cdk-lib/aws-iam"; +import { Tags } from "aws-cdk-lib"; + +export interface BootstrapAppRunnerProps { + appName: string; + environment: string; + region: string; + ecrRepoName: string; +} + +export class BootstrapAppRunnerStack extends cdk.Stack { + constructor(scope: cdk.App, id: string, props: BootstrapAppRunnerProps) { + super(scope, id, { env: { region: props.region } }); + + Tags.of(this).add("cicd:app", props.appName); + Tags.of(this).add("cicd:env", props.environment); + Tags.of(this).add("cicd:managed-by", "aws-cicd-skill"); + + const repo = new ecr.Repository(this, "EcrRepo", { + repositoryName: props.ecrRepoName, + imageScanOnPush: true, + lifecycleRules: [{ maxImageCount: 20 }], + }); + + const accessRole = new iam.Role(this, "AppRunnerAccessRole", { + assumedBy: new iam.ServicePrincipal("build.apprunner.amazonaws.com"), + }); + repo.grantPull(accessRole); + + const instanceRole = new iam.Role(this, "AppRunnerInstanceRole", { + assumedBy: new iam.ServicePrincipal("tasks.apprunner.amazonaws.com"), + }); + + const service = new apprunner.CfnService(this, "Service", { + serviceName: `${props.appName}-${props.environment}`, + sourceConfiguration: { + authenticationConfiguration: { + accessRoleArn: accessRole.roleArn, + }, + imageRepository: { + imageIdentifier: `${repo.repositoryUri}:latest`, + imageRepositoryType: "ECR", + imageConfiguration: { port: "8080" }, + }, + autoDeploymentsEnabled: false, + }, + instanceConfiguration: { + cpu: "1024", + memory: "2048", + instanceRoleArn: instanceRole.roleArn, + }, + }); + + new cdk.CfnOutput(this, "EcrRepositoryUri", { value: repo.repositoryUri }); + new cdk.CfnOutput(this, "AppRunnerServiceArn", { value: service.attrServiceArn }); + new cdk.CfnOutput(this, "AppRunnerServiceUrl", { value: service.attrServiceUrl }); + new cdk.CfnOutput(this, "SsmHandlesHint", { + value: `/cicd/${props.appName}/${props.environment}/handles`, + description: "Write handles JSON here after bootstrap", + }); + } +} + +const app = new cdk.App(); +const appName = app.node.tryGetContext("appName") ?? "my-app"; +const environment = app.node.tryGetContext("environment") ?? "dev"; +const region = app.node.tryGetContext("region") ?? "us-east-1"; + +new BootstrapAppRunnerStack(app, "BootstrapAppRunner", { + appName, + environment, + region, + ecrRepoName: `${appName}-${environment}`, +}); diff --git a/.cicd/cdk/bootstrap-ecs.ts b/.cicd/cdk/bootstrap-ecs.ts new file mode 100644 index 0000000000..05e7cd8dc5 --- /dev/null +++ b/.cicd/cdk/bootstrap-ecs.ts @@ -0,0 +1,83 @@ +import * as cdk from "aws-cdk-lib"; +import * as ec2 from "aws-cdk-lib/aws-ec2"; +import * as ecs from "aws-cdk-lib/aws-ecs"; +import * as elbv2 from "aws-cdk-lib/aws-elasticloadbalancingv2"; +import * as ecr from "aws-cdk-lib/aws-ecr"; +import * as logs from "aws-cdk-lib/aws-logs"; +import { Tags } from "aws-cdk-lib"; + +export interface BootstrapEcsProps { + appName: string; + environment: string; + region: string; + vpcId?: string; +} + +export class BootstrapEcsStack extends cdk.Stack { + constructor(scope: cdk.App, id: string, props: BootstrapEcsProps) { + super(scope, id, { env: { region: props.region } }); + + Tags.of(this).add("cicd:app", props.appName); + Tags.of(this).add("cicd:env", props.environment); + Tags.of(this).add("cicd:managed-by", "aws-cicd-skill"); + + const vpc = props.vpcId + ? ec2.Vpc.fromLookup(this, "Vpc", { vpcId: props.vpcId }) + : new ec2.Vpc(this, "Vpc", { maxAzs: 2, natGateways: 1 }); + + const repo = new ecr.Repository(this, "EcrRepo", { + repositoryName: `${props.appName}-${props.environment}`, + imageScanOnPush: true, + }); + + const cluster = new ecs.Cluster(this, "Cluster", { + vpc, + clusterName: `${props.appName}-${props.environment}`, + containerInsights: true, + }); + + const taskDef = new ecs.FargateTaskDefinition(this, "TaskDef", { + cpu: 512, + memoryLimitMiB: 1024, + }); + taskDef.addContainer("App", { + image: ecs.ContainerImage.fromEcrRepository(repo, "latest"), + logging: ecs.LogDrivers.awsLogs({ + streamPrefix: props.appName, + logRetention: logs.RetentionDays.ONE_MONTH, + }), + portMappings: [{ containerPort: 8080 }], + }); + + const service = new ecs.FargateService(this, "Service", { + cluster, + taskDefinition: taskDef, + desiredCount: 1, + assignPublicIp: true, + }); + + const alb = new elbv2.ApplicationLoadBalancer(this, "Alb", { vpc, internetFacing: true }); + const listener = alb.addListener("Http", { port: 80, open: true }); + const tg = listener.addTargets("EcsTargets", { + port: 8080, + targets: [service], + healthCheck: { path: "/health" }, + }); + + new cdk.CfnOutput(this, "ClusterName", { value: cluster.clusterName }); + new cdk.CfnOutput(this, "TargetGroupArn", { value: tg.targetGroupArn }); + new cdk.CfnOutput(this, "EcrRepositoryUri", { value: repo.repositoryUri }); + } +} + +const app = new cdk.App(); +const appName = app.node.tryGetContext("appName") ?? "my-app"; +const environment = app.node.tryGetContext("environment") ?? "dev"; +const region = app.node.tryGetContext("region") ?? "us-east-1"; + +new BootstrapEcsStack(app, "BootstrapEcs", { + appName, + environment, + region, + vpcId: app.node.tryGetContext("vpcId"), +}); diff --git a/.cicd/cdk/import-existing.ts b/.cicd/cdk/import-existing.ts new file mode 100644 index 0000000000..b5d71fd105 --- /dev/null +++ b/.cicd/cdk/import-existing.ts @@ -0,0 +1,64 @@ +import * as cdk from "aws-cdk-lib"; +import * as ec2 from "aws-cdk-lib/aws-ec2"; +import * as ecs from "aws-cdk-lib/aws-ecs"; +import * as elbv2 from "aws-cdk-lib/aws-elasticloadbalancingv2"; +import { Tags } from "aws-cdk-lib"; + +export interface ImportExistingProps { + appName: string; + environment: string; + region: string; + clusterName: string; + clusterArn: string; + vpcArn: string; + targetGroupArn: string; + albArn: string; +} + +/** Import existing ECS/ALB resources — never creates new Cluster(). */ +export class ImportExistingStack extends cdk.Stack { + constructor(scope: cdk.App, id: string, props: ImportExistingProps) { + super(scope, id, { env: { region: props.region } }); + + Tags.of(this).add("cicd:app", props.appName); + Tags.of(this).add("cicd:env", props.environment); + Tags.of(this).add("cicd:managed-by", "aws-cicd-skill"); + + const vpc = ec2.Vpc.fromVpcAttributes(this, "ImportedVpc", { + vpcId: app.node.tryGetContext("vpcId") ?? "vpc-placeholder", + availabilityZones: ["us-east-1a", "us-east-1b"], + }); + const cluster = ecs.Cluster.fromClusterAttributes(this, "ImportedCluster", { + clusterName: props.clusterName, + clusterArn: props.clusterArn, + vpc, + }); + + const tg = elbv2.ApplicationTargetGroup.fromTargetGroupAttributes(this, "ImportedTg", { + targetGroupArn: props.targetGroupArn, + }); + + elbv2.ApplicationLoadBalancer.fromApplicationLoadBalancerAttributes(this, "ImportedAlb", { + loadBalancerArn: props.albArn, + securityGroupId: "sg-placeholder", + }); + + new cdk.CfnOutput(this, "ImportedClusterArn", { value: cluster.clusterArn }); + new cdk.CfnOutput(this, "ImportedTargetGroupArn", { value: tg.targetGroupArn }); + new cdk.CfnOutput(this, "SsmHandlesPath", { + value: `/cicd/${props.appName}/${props.environment}/handles`, + }); + } +} + +const app = new cdk.App(); +new ImportExistingStack(app, "ImportExisting", { + appName: app.node.tryGetContext("appName") ?? "my-app", + environment: app.node.tryGetContext("environment") ?? "dev", + region: app.node.tryGetContext("region") ?? "us-east-1", + clusterName: app.node.tryGetContext("clusterName") ?? "cluster", + clusterArn: app.node.tryGetContext("clusterArn") ?? "/cluster", + vpcArn: app.node.tryGetContext("vpcArn") ?? "/vpc", + targetGroupArn: app.node.tryGetContext("targetGroupArn") ?? "/target-group", + albArn: app.node.tryGetContext("albArn") ?? "/alb", +}); diff --git a/.cicd/cdk/package.json b/.cicd/cdk/package.json new file mode 100644 index 0000000000..79a218c653 --- /dev/null +++ b/.cicd/cdk/package.json @@ -0,0 +1,17 @@ +{ + "name": "cicd-bootstrap-cdk", + "version": "1.0.0", + "private": true, + "scripts": { + "build": "tsc", + "synth": "cdk synth" + }, + "dependencies": { + "aws-cdk-lib": "2.170.0", + "constructs": "10.4.2" + }, + "devDependencies": { + "typescript": "5.6.3", + "@types/node": "20.17.6" + } +} diff --git a/.cicd/cdk/tsconfig.json b/.cicd/cdk/tsconfig.json new file mode 100644 index 0000000000..6e96fa74d6 --- /dev/null +++ b/.cicd/cdk/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "commonjs", + "lib": ["ES2022"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "outDir": "dist", + "rootDir": "." + }, + "include": ["*.ts"] +} diff --git a/.cicd/env/dev.yaml b/.cicd/env/dev.yaml new file mode 100644 index 0000000000..e3632d7543 --- /dev/null +++ b/.cicd/env/dev.yaml @@ -0,0 +1,34 @@ +environment: dev + +# Public metadata only — ARNs and passwords live in SSM / Secrets Manager +aws: + region: us-east-1 + # account_id: fill after `aws sts get-caller-identity` + +app: + name: full-stack-fastapi-template + domain: null + +deployment: + target_override: ecs-fargate + mode: single-node + +scaling: + min_instances: 1 + max_instances: 2 + +health: + path: /api/v1/utils/health-check/ + timeout_seconds: 10 + +ecr: + backend_repository: full-stack-fastapi-template-dev-backend + frontend_repository: full-stack-fastapi-template-dev-frontend + +secrets: + database: full-stack-fastapi-template-dev/database + app: full-stack-fastapi-template-dev/app + +logging: + cloudwatch: + enabled: true diff --git a/.cicd/env/dev.yaml.example b/.cicd/env/dev.yaml.example new file mode 100644 index 0000000000..8810fe2086 --- /dev/null +++ b/.cicd/env/dev.yaml.example @@ -0,0 +1,22 @@ +environment: dev + +# Public metadata only — ARNs live in SSM (/cicd//dev/handles) +aws: + region: us-east-1 + +app: + name: my-app + domain: dev.example.com + +deployment: + # Optional override; skill auto-detects apprunner vs ecs-fargate + target_override: null + mode: single-node + +scaling: + min_instances: 1 + max_instances: 2 + +health: + path: /health + timeout_seconds: 10 diff --git a/.cicd/env/prod.yaml.example b/.cicd/env/prod.yaml.example new file mode 100644 index 0000000000..2eba6a08a4 --- /dev/null +++ b/.cicd/env/prod.yaml.example @@ -0,0 +1,20 @@ +environment: prod + +aws: + region: us-east-1 + +app: + name: my-app + domain: example.com + +deployment: + target_override: null + mode: distributed + +scaling: + min_instances: 2 + max_instances: 10 + +health: + path: /health + timeout_seconds: 10 diff --git a/.cicd/env/staging.yaml.example b/.cicd/env/staging.yaml.example new file mode 100644 index 0000000000..62148cabb0 --- /dev/null +++ b/.cicd/env/staging.yaml.example @@ -0,0 +1,20 @@ +environment: staging + +aws: + region: us-east-1 + +app: + name: my-app + domain: staging.example.com + +deployment: + target_override: null + mode: single-node + +scaling: + min_instances: 1 + max_instances: 3 + +health: + path: /health + timeout_seconds: 10 diff --git a/.cicd/project.yaml b/.cicd/project.yaml new file mode 100644 index 0000000000..7b7a8d1877 --- /dev/null +++ b/.cicd/project.yaml @@ -0,0 +1,66 @@ +version: 2 +generated_by: aws-cicd-v2 + +repository: + owner: wuben154-maker + name: full-stack-fastapi-template + default_branch: master + url: https://github.com/wuben154-maker/full-stack-fastapi-template + +project: + app_name: full-stack-fastapi-template + layout: frontend-backend-split + package_manager: uv + +deployment: + detected_target: ecs-fargate + target_override: null + mode_default: single-node + min_instances: 1 + max_instances: 2 + +database: + enabled: true + engine: postgresql + migration_tool: alembic + migrate_command: alembic upgrade head + precheck_command: uv run python app/backend_pre_start.py + bootstrap_command: uv run python app/initial_data.py + secret_name: full-stack-fastapi-template-dev/database + notes: | + Provision RDS PostgreSQL (or dev Postgres) and store credentials in AWS Secrets Manager. + ECS task env should map secret fields to POSTGRES_SERVER, POSTGRES_PORT, POSTGRES_USER, + POSTGRES_PASSWORD, and POSTGRES_DB. Run migrations before or during backend deploy. + +services: + backend: + enabled: true + path: backend + dockerfile: backend/Dockerfile + build_context: . + container_port: 8000 + health_path: /api/v1/utils/health-check/ + commands: + install: pip install uv && uv sync --frozen --package app + lint: uv run ruff check app scripts && uv run ruff format app scripts --check + typecheck: uv run mypy app + test: cd .. && docker compose up -d db && cd backend && uv run bash scripts/prestart.sh && uv run bash scripts/tests-start.sh "CI" && cd .. && docker compose down -v --remove-orphans + build: docker build -f Dockerfile -t full-stack-fastapi-template-backend:ci .. + + frontend: + enabled: true + path: frontend + dockerfile: frontend/Dockerfile + build_context: . + container_port: 80 + health_path: / + commands: + install: npm install -g bun && bun install + lint: bun run lint + typecheck: bun run build + test: echo "Playwright covered by .github/workflows/playwright.yml" + build: docker build -f Dockerfile -t full-stack-fastapi-template-frontend:ci .. --build-arg VITE_API_URL=http://localhost:8000 + +handles: + ssm_prefix: /cicd/wuben154-maker-full-stack-fastapi-template + # Full handles JSON stored in SSM per env — never commit ARNs here diff --git a/.cicd/runbooks/rollback.md b/.cicd/runbooks/rollback.md new file mode 100644 index 0000000000..ddb7955ff2 --- /dev/null +++ b/.cicd/runbooks/rollback.md @@ -0,0 +1,63 @@ +# Rollback Runbook + +## When To Roll Back + +- Deployment workflow fails after updating infrastructure. +- Health or smoke verification fails. +- Error rate, latency, or business smoke checks exceed the configured threshold. +- A secret or configuration issue is detected after release. + +## ECS + +The deploy workflow automatically attempts this path when verification fails after an ECS service update: + +1. Read the previous task definition from the rollback baseline captured before deploy. +2. Update the ECS service back to the previous task definition. +3. Wait for service stability. +4. Re-run health checks. +5. Save `rollback-evidence.json`. + +Manual command shape: + +```bash +aws ecs update-service \ + --cluster "$ECS_CLUSTER" \ + --service "$ECS_SERVICE" \ + --task-definition "$PREVIOUS_TASK_DEFINITION" + +aws ecs wait services-stable \ + --cluster "$ECS_CLUSTER" \ + --services "$ECS_SERVICE" +``` + +## EC2 SSH + +The deploy workflow automatically attempts this path when verification fails after an EC2 container restart: + +1. Read the previous image from the rollback baseline captured before deploy. +2. SSH to affected hosts. +3. Pull and restart the previous image. +4. Re-run host health checks. +5. Save `rollback-evidence.json`. + +Manual command shape: + +```bash +ssh "$SSH_USER@$HOST" " + docker pull '$PREVIOUS_IMAGE' && + docker rm -f '$CONTAINER_NAME' || true && + docker run -d --restart unless-stopped --name '$CONTAINER_NAME' '$PREVIOUS_IMAGE' +" +``` + +## Evidence Files + +- `deployment-evidence.json`: target environment, release ID, image digests, service revisions, target health, HTTP verification, and workflow URL. +- `rollback-evidence.json`: previous task definitions or image digests and rollback status per service or host. + +## Stop Conditions + +- Do not retry blindly after two failures with the same root cause. +- Escalate if rollback cannot restore healthy service. +- Do not expose production traffic until verification passes. +- Do not assume database rollback is safe after destructive migrations. diff --git a/.cicd/scripts/estimate.py b/.cicd/scripts/estimate.py new file mode 100644 index 0000000000..7738870597 --- /dev/null +++ b/.cicd/scripts/estimate.py @@ -0,0 +1,72 @@ +"""Monthly cost estimate (pure math, no AWS API calls).""" + +from __future__ import annotations + +import argparse + +# Public list prices (USD/month, approximate) — update in one place +APPRUNNER_VCPU_HOUR = 0.064 +APPRUNNER_GB_HOUR = 0.007 +ECS_VCPU_HOUR = 0.04048 +ECS_GB_HOUR = 0.004445 +ALB_MONTHLY = 16.0 +NAT_MONTHLY = 32.0 +CW_LOGS_GB = 0.50 + + +def estimate_apprunner( + vcpu: float = 1.0, memory_gb: float = 2.0, hours: float = 24 * 30 +) -> dict: + compute = vcpu * APPRUNNER_VCPU_HOUR * hours + memory_gb * APPRUNNER_GB_HOUR * hours + return { + "target": "apprunner", + "monthly_usd": round(compute, 2), + "breakdown": {"compute": round(compute, 2)}, + } + + +def estimate_ecs( + vcpu: float = 0.5, + memory_gb: float = 1.0, + tasks: int = 1, + include_alb: bool = True, + include_nat: bool = True, + log_gb: float = 5.0, +) -> dict: + hours = 24 * 30 + compute = tasks * (vcpu * ECS_VCPU_HOUR * hours + memory_gb * ECS_GB_HOUR * hours) + alb = ALB_MONTHLY if include_alb else 0 + nat = NAT_MONTHLY if include_nat else 0 + logs = log_gb * CW_LOGS_GB + total = compute + alb + nat + logs + return { + "target": "ecs-fargate", + "monthly_usd": round(total, 2), + "breakdown": { + "fargate": round(compute, 2), + "alb": alb, + "nat": nat, + "logs": round(logs, 2), + }, + } + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument( + "--target", choices=["apprunner", "ecs-fargate"], default="apprunner" + ) + p.add_argument("--cpu", type=float, default=1.0) + p.add_argument("--memory", type=float, default=2.0, help="GB") + args = p.parse_args() + if args.target == "apprunner": + r = estimate_apprunner(args.cpu, args.memory) + else: + r = estimate_ecs(vcpu=args.cpu, memory_gb=args.memory) + print(f"**Estimated monthly cost:** ${r['monthly_usd']}") + for k, v in r["breakdown"].items(): + print(f"- {k}: ${v}") + + +if __name__ == "__main__": + main() diff --git a/.cicd/secrets/README.md b/.cicd/secrets/README.md new file mode 100644 index 0000000000..e891581666 --- /dev/null +++ b/.cicd/secrets/README.md @@ -0,0 +1,57 @@ +# CI/CD Secret Boundary + +Do not commit real secret values to `.cicd/`. + +The generated environment files may store secret names, variable names, and ARN references only. Real values belong in GitHub Secrets, GitHub Variables, AWS Secrets Manager, or AWS Systems Manager Parameter Store. + +## GitHub Secrets + +- `AWS_RELEASE_ROLE_ARN`: IAM role ARN trusted by GitHub OIDC for release and deploy workflows. +- `AWS_EC2_SSH_PRIVATE_KEY`: private key for EC2 SSH deployment, required only when `deployment.target` is `ec2-ssh`. + +## GitHub Variables + +Define only the variables needed by enabled services: + +- `AWS_REGION` +- `ECR_REPOSITORY_FRONTEND` +- `ECR_REPOSITORY_BACKEND` +- `ECR_REPOSITORY_WORKER` + +## AWS Secret Stores + +Application runtime values should live in AWS Secrets Manager or SSM Parameter Store: + +- database credentials +- application runtime secrets +- third-party API keys +- OAuth credentials + +Reference these values by name or ARN in `.cicd/env/.yaml`. + +## CloudWatch Logs 写入权限(EC2 实例角色) + +当 `.cicd/env/.yaml` 中 `logging.cloudwatch.enabled` 为 `true` 时, +Docker 的 awslogs 日志驱动会直接调用 AWS CloudWatch Logs API 推送日志, +因此 EC2 实例的 IAM 角色需要以下权限: + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": ":logs:::log-group:/ecs/*:*" + } + ] +} +``` + +建议将 Resource 收窄到具体的 log group 前缀,不要使用 `"*"`。 + +此权限由 EC2 实例角色承担,不需要在 GitHub Actions 中配置额外的 Secret。 diff --git a/.github/workflows/cdk-bootstrap.yml b/.github/workflows/cdk-bootstrap.yml new file mode 100644 index 0000000000..a451f98310 --- /dev/null +++ b/.github/workflows/cdk-bootstrap.yml @@ -0,0 +1,56 @@ +name: cdk-bootstrap + +on: + workflow_dispatch: + inputs: + environment: + description: dev | staging | prod + required: true + type: choice + options: [dev, staging, prod] + app_name: + description: Application name (unique per account) + required: true + deployment_target: + description: apprunner | ecs-fargate + required: true + type: choice + options: [apprunner, ecs-fargate] + +permissions: + id-token: write + contents: read + +jobs: + bootstrap: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + steps: + - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_BOOTSTRAP_ROLE_ARN }} + aws-region: ${{ vars.AWS_REGION || 'us-east-1' }} + + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install CDK deps + working-directory: .cicd/cdk + run: npm ci + + - name: CDK synth App Runner stack + if: inputs.deployment_target == 'apprunner' + working-directory: .cicd/cdk + run: | + npx cdk synth BootstrapAppRunner \ + -c appName=${{ inputs.app_name }} \ + -c environment=${{ inputs.environment }} \ + -c region=${{ vars.AWS_REGION || 'us-east-1' }} + + - name: CDK deploy (user runs locally or via approved pipeline) + run: | + echo "Bootstrap stack synthesized. Run \`cdk deploy\` with bootstrap role in approved window." + echo "After deploy, write handles to SSM /cicd/${{ inputs.app_name }}/${{ inputs.environment }}/handles" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..4c23679319 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,111 @@ +name: ci + +on: + pull_request: + push: + branches: [main, master] + workflow_dispatch: + +permissions: + contents: read + security-events: write + +jobs: + detect: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.detect.outputs.matrix }} + has_services: ${{ steps.detect.outputs.has_services }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install YAML parser + run: python -m pip install --disable-pip-version-check pyyaml + - id: detect + shell: bash + run: | + test -f .cicd/project.yaml + python - <<'PY' >> "$GITHUB_OUTPUT" + import json + import yaml + + with open(".cicd/project.yaml", encoding="utf-8") as handle: + data = yaml.safe_load(handle) or {} + + services = [] + for name, svc in (data.get("services") or {}).items(): + if svc and svc.get("enabled") is True: + services.append({"name": name}) + + print(f"matrix={json.dumps({'include': services})}") + print(f"has_services={str(bool(services)).lower()}") + PY + + checks: + needs: detect + if: ${{ needs.detect.outputs.has_services == 'true' }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: ${{ fromJson(needs.detect.outputs.matrix) }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install YAML parser + run: python -m pip install --disable-pip-version-check pyyaml + - name: Run service checks + shell: bash + env: + SERVICE_NAME: ${{ matrix.name }} + run: | + python - <<'PY' + import os + import pathlib + import subprocess + import sys + import yaml + + service_name = os.environ["SERVICE_NAME"] + with open(".cicd/project.yaml", encoding="utf-8") as handle: + data = yaml.safe_load(handle) or {} + + svc = (data.get("services") or {}).get(service_name) or {} + service_path = pathlib.Path(svc.get("path") or ".") + commands = svc.get("commands") or {} + ordered_checks = ["install", "lint", "typecheck", "test", "build"] + summary = [f"## CI summary for `{service_name}`", ""] + + if not service_path.exists(): + raise SystemExit(f"Configured path does not exist for {service_name}: {service_path}") + + for check_name in ordered_checks: + command = commands.get(check_name) + if not command or str(command).startswith("TODO"): + reason = "missing command in .cicd/project.yaml" + summary.append(f"- {check_name}: skipped ({reason})") + continue + + summary.append(f"- {check_name}: running `{command}`") + result = subprocess.run(command, shell=True, cwd=service_path) + if result.returncode != 0: + summary.append(f"- {check_name}: failed with exit code {result.returncode}") + pathlib.Path(os.environ["GITHUB_STEP_SUMMARY"]).write_text("\n".join(summary) + "\n", encoding="utf-8") + sys.exit(result.returncode) + summary.append(f"- {check_name}: passed") + + pathlib.Path(os.environ["GITHUB_STEP_SUMMARY"]).write_text("\n".join(summary) + "\n", encoding="utf-8") + PY + + no-services: + needs: detect + if: ${{ needs.detect.outputs.has_services != 'true' }} + runs-on: ubuntu-latest + steps: + - name: Block empty CI configuration + run: | + echo "No enabled services were found in .cicd/project.yaml" >> "$GITHUB_STEP_SUMMARY" + exit 1 diff --git a/.github/workflows/cost-estimate.yml b/.github/workflows/cost-estimate.yml new file mode 100644 index 0000000000..30a2400000 --- /dev/null +++ b/.github/workflows/cost-estimate.yml @@ -0,0 +1,44 @@ +name: cost-estimate + +on: + workflow_call: + inputs: + deployment_target: + required: true + type: string + vcpu: + required: false + type: string + default: "1" + memory_gb: + required: false + type: string + default: "2" + +jobs: + estimate: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Write cost estimate to job summary + run: | + PREV="${COST_ESTIMATE_PREVIOUS:-0}" + EST=".cicd/scripts/estimate.py" + if [ ! -f "$EST" ]; then EST="templates/lib/estimate.py"; fi + python "$EST" \ + --target "${{ inputs.deployment_target }}" \ + --cpu "${{ inputs.vcpu }}" \ + --memory "${{ inputs.memory_gb }}" >> "$GITHUB_STEP_SUMMARY" + CUR=$(python "$EST" --target "${{ inputs.deployment_target }}" 2>/dev/null | grep -oP '\$\K[0-9.]+' | head -1 || echo "0") + if [ -n "$PREV" ] && [ "$PREV" != "0" ]; then + python - < {cur})") + PY + fi + env: + COST_ESTIMATE_PREVIOUS: ${{ vars.COST_ESTIMATE_PREVIOUS }} diff --git a/.github/workflows/deploy-ecs.yml b/.github/workflows/deploy-ecs.yml new file mode 100644 index 0000000000..6751d29822 --- /dev/null +++ b/.github/workflows/deploy-ecs.yml @@ -0,0 +1,93 @@ +name: deploy-ecs + +on: + workflow_dispatch: + inputs: + environment: + required: true + type: choice + options: [dev, staging, prod] + image_digest: + description: Immutable ECR digest (sha256:...) + required: true + release_id: + required: true + +permissions: + id-token: write + contents: read + +jobs: + deploy: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + steps: + - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_DEPLOY_ROLE_ARN }} + aws-region: ${{ vars.AWS_REGION || 'us-east-1' }} + + - name: Capture current task definition (rollback baseline) + id: baseline + run: | + FAMILY="${{ vars.ECS_TASK_FAMILY }}" + PREV=$(aws ecs describe-services --cluster "${{ vars.ECS_CLUSTER }}" --services "${{ vars.ECS_SERVICE }}" \ + --query 'services[0].taskDefinition' --output text) + echo "previous_task_def=${PREV}" >> "$GITHUB_OUTPUT" + + - name: Register task definition with immutable digest + id: register + run: | + IMAGE="${{ vars.ECR_REPOSITORY_URI }}@${{ inputs.image_digest }}" + aws ecs describe-task-definition --task-definition "${{ vars.ECS_TASK_FAMILY }}" > task.json + python - <<'PY' + import json + td = json.load(open("task.json"))["taskDefinition"] + td["containerDefinitions"][0]["image"] = "${{ vars.ECR_REPOSITORY_URI }}@${{ inputs.image_digest }}" + for k in ("taskDefinitionArn","revision","status","requiresAttributes","compatibilities","registeredAt","registeredBy"): + td.pop(k, None) + json.dump(td, open("new-task.json","w")) + PY + NEW_ARN=$(aws ecs register-task-definition --cli-input-json file://new-task.json \ + --query 'taskDefinition.taskDefinitionArn' --output text) + echo "task_def_arn=${NEW_ARN}" >> "$GITHUB_OUTPUT" + + - name: Update ECS service + run: | + aws ecs update-service \ + --cluster "${{ vars.ECS_CLUSTER }}" \ + --service "${{ vars.ECS_SERVICE }}" \ + --task-definition "${{ steps.register.outputs.task_def_arn }}" \ + --force-new-deployment + + - name: Wait for service stable + run: | + aws ecs wait services-stable \ + --cluster "${{ vars.ECS_CLUSTER }}" \ + --services "${{ vars.ECS_SERVICE }}" + + - name: Verify target group health + run: | + aws elbv2 describe-target-health --target-group-arn "${{ vars.ALB_TARGET_GROUP_ARN }}" \ + --query 'TargetHealthDescriptions[?TargetHealth.State!=`healthy`]' --output text | grep -q . && exit 1 || true + + - name: Rollback on failure + if: failure() + run: | + echo "Rolling back to ${{ steps.baseline.outputs.previous_task_def }}" + aws ecs update-service \ + --cluster "${{ vars.ECS_CLUSTER }}" \ + --service "${{ vars.ECS_SERVICE }}" \ + --task-definition "${{ steps.baseline.outputs.previous_task_def }}" + + - name: Cost estimate + run: | + echo "## Cost estimate (ECS Fargate)" >> "$GITHUB_STEP_SUMMARY" + EST=".cicd/scripts/estimate.py" + if [ -f "$EST" ]; then + python "$EST" --target ecs-fargate >> "$GITHUB_STEP_SUMMARY" + else + echo "Estimated monthly: ~\$80–\$200 (Fargate + ALB + NAT)" >> "$GITHUB_STEP_SUMMARY" + fi diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000000..fdf63798a2 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,207 @@ +name: release + +on: + workflow_dispatch: + inputs: + environment: + description: "dev | staging | prod" + required: true + default: staging + type: choice + options: [dev, staging, prod] + service_scope: + description: "frontend-only | backend-only | worker-only | all" + required: true + default: all + release_id: + description: "Immutable release id. Defaults to commit SHA when omitted." + required: false + +permissions: + id-token: write + contents: read + +jobs: + build: + runs-on: ubuntu-latest + environment: ${{ inputs.environment }} + outputs: + image_digest: ${{ steps.digest.outputs.image_digest }} + release_id: ${{ steps.digest.outputs.release_id }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install YAML parser + run: python -m pip install --disable-pip-version-check pyyaml + - uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_RELEASE_ROLE_ARN }} + aws-region: ${{ vars.AWS_REGION || 'us-east-1' }} + - uses: aws-actions/amazon-ecr-login@v2 + + - name: Build and push selected images + shell: bash + run: | + RELEASE_ID="${{ inputs.release_id }}" + if [ -z "$RELEASE_ID" ]; then + RELEASE_ID="${GITHUB_SHA}" + fi + export RELEASE_ID + python - <<'PY' + import json + import os + import subprocess + import time + from pathlib import Path + + import yaml + + project = yaml.safe_load(open(".cicd/project.yaml", encoding="utf-8")) or {} + env_name = os.environ["DEPLOY_ENVIRONMENT"] + env_path = Path(f".cicd/env/{env_name}.yaml") + if not env_path.exists(): + env_path = Path(f".cicd/env/{env_name}.yaml.example") + if not env_path.exists(): + raise SystemExit(f"Missing environment config for {env_name}") + + env_cfg = yaml.safe_load(env_path.read_text(encoding="utf-8")) or {} + scope = os.environ["SERVICE_SCOPE"] + release_id = os.environ["RELEASE_ID"] + aws_cfg = env_cfg.get("aws") or {} + region = aws_cfg.get("region") or os.environ.get("AWS_REGION") or "us-east-1" + account_id = (aws_cfg.get("account_id") or "").strip() + if not account_id or str(account_id).startswith("TODO"): + account_id = subprocess.check_output( + ["aws", "sts", "get-caller-identity", "--query", "Account", "--output", "text"], + text=True, + ).strip() + ecr_registry = f"{account_id}.dkr.ecr.{region}.amazonaws.com" + ecr_cfg = env_cfg.get("ecr") or {} + app_cfg = env_cfg.get("app") or {} + app_name = ( + app_cfg.get("name") + or (project.get("project") or {}).get("app_name") + or os.environ.get("GITHUB_REPOSITORY", "").split("/")[-1] + ) + if str(app_name).startswith("TODO"): + app_name = os.environ.get("GITHUB_REPOSITORY", "").split("/")[-1] + default_repo = f"{str(app_name).replace('_', '-').lower()}-{env_name}" + + def repo_for(name: str) -> str: + keys = { + "frontend": "frontend_repository", + "backend": "backend_repository", + "worker": "worker_repository", + "web": "repository", + } + env_key = keys.get(name, "repository") + return ( + ecr_cfg.get(env_key) + or ecr_cfg.get("repository") + or os.environ.get(f"ECR_REPOSITORY_{name.upper()}", "") + or os.environ.get("ECR_REPOSITORY", "") + or default_repo + ) + + def is_enabled(svc: dict) -> bool: + v = svc.get("enabled") + if v is True: + return True + if isinstance(v, str) and not v.startswith("TODO"): + return v.lower() in ("true", "yes", "1") + return False + + def scope_matches(name: str) -> bool: + if scope == "all": + return True + aliases = { + "frontend-only": ("frontend", "web"), + "backend-only": ("backend",), + "worker-only": ("worker",), + } + if scope in aliases: + return name in aliases[scope] + return scope == f"{name}-only" + + selected = [] + for name, svc in (project.get("services") or {}).items(): + if not svc or not is_enabled(svc): + continue + if not scope_matches(name): + continue + selected.append((name, svc)) + + if not selected: + raise SystemExit(f"No enabled services selected by scope {scope}") + + metadata = { + "schema_version": 1, + "release_id": release_id, + "environment": env_name, + "service_scope": scope, + "commit_sha": os.environ["GITHUB_SHA"], + "actor": os.environ["GITHUB_ACTOR"], + "run_id": os.environ["GITHUB_RUN_ID"], + "run_url": f"{os.environ['GITHUB_SERVER_URL']}/{os.environ['GITHUB_REPOSITORY']}/actions/runs/{os.environ['GITHUB_RUN_ID']}", + "created_at": int(time.time()), + "images": {}, + } + + for name, svc in selected: + repository = repo_for(name) + if not repository or str(repository).startswith("TODO"): + raise SystemExit(f"Missing ECR repository variable for {name}") + repository_uri = repository if ".amazonaws.com/" in repository else f"{ecr_registry}/{repository}" + + context = svc.get("build_context") or svc.get("path") or "." + dockerfile = svc.get("dockerfile") or f"{context}/Dockerfile" + image = f"{repository_uri}:{release_id}" + subprocess.run(["docker", "build", "-f", dockerfile, "-t", image, context], check=True) + subprocess.run(["docker", "push", image], check=True) + + repository_name = repository_uri.split("/", 1)[1] + digest = subprocess.check_output( + ["aws", "ecr", "describe-images", "--repository-name", repository_name, + "--image-ids", f"imageTag={release_id}", "--query", "imageDetails[0].imageDigest", + "--output", "text"], + text=True, + ).strip() + + metadata["images"][name] = { + "repository": repository_uri, + "repository_name": repository_name, + "tag": release_id, + "image_tag": image, + "digest": digest, + "image_uri": f"{repository_uri}@{digest}", + "dockerfile": dockerfile, + "build_context": context, + } + + open("promotion-metadata.json", "w", encoding="utf-8").write(json.dumps(metadata, indent=2)) + PY + env: + DEPLOY_ENVIRONMENT: ${{ inputs.environment }} + SERVICE_SCOPE: ${{ inputs.service_scope }} + RELEASE_ID: ${{ inputs.release_id || github.sha }} + ECR_REPOSITORY_FRONTEND: ${{ vars.ECR_REPOSITORY_FRONTEND }} + ECR_REPOSITORY_BACKEND: ${{ vars.ECR_REPOSITORY_BACKEND }} + ECR_REPOSITORY_WORKER: ${{ vars.ECR_REPOSITORY_WORKER }} + + - id: digest + run: | + DIGEST=$(python -c "import json; m=json.load(open('promotion-metadata.json')); imgs=m.get('images',{}); print(next(iter(imgs.values()))['digest'] if imgs else '')") + echo "image_digest=${DIGEST}" >> "$GITHUB_OUTPUT" + echo "release_id=${{ inputs.release_id || github.sha }}" >> "$GITHUB_OUTPUT" + + - uses: actions/upload-artifact@v4 + with: + name: promotion-metadata + path: promotion-metadata.json + - name: Summarize release + run: | + echo "## Release metadata" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + cat promotion-metadata.json >> "$GITHUB_STEP_SUMMARY" diff --git a/.gitignore b/.gitignore index f903ab6066..d0c3e973c6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ node_modules/ /playwright-report/ /blob-report/ /playwright/.cache/ +.cicd/.cache/