From 87988576ee42cc3869acf0c45762333bcd15d89c Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Thu, 12 Mar 2026 15:31:10 +0100 Subject: [PATCH] Slack alert improvements, dedup, and CI workflow consolidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slack alerts: - Fix hero provisioner formatting — always-table with Type/Email/Status - Add DynamoDB dedup for Security Hub + compliance alerts (30-day TTL) - Add "View & Suppress" link button on Security Hub alerts (console deep-link) - Add weekly Security Hub summary Lambda with table format (Monday 08:00 UTC) - Consolidate HIGH risk alert into single rich message with findings + override button - Remove duplicate alert from check-risk-block.sh CI consolidation: - Merge plan-review.yml into tf-plan.yml (saves job startup + S3 re-download) - Merge platform-ci review job inline into plan job (same pattern) - Delete unused commit-terraform.yml and plan-review.yml - Remove plan-review.yml from OIDC trust policy - Update ruleset required check (done via gh CLI separately) --- .github/workflows/commit-terraform.yml | 75 ------ .github/workflows/javabin.yml | 21 +- .github/workflows/plan-review.yml | 90 ------- .github/workflows/platform-ci.yml | 38 +-- .github/workflows/tf-plan.yml | 32 +++ scripts/check-risk-block.sh | 17 +- scripts/notify-high-risk.sh | 85 ++++++- terraform/lambda-src/slack_alert/handler.py | 223 +++++++++++++++++- .../lambda-src/team_provisioner/handler.py | 141 ++++++++++- terraform/platform/iam/main.tf | 3 +- terraform/platform/lambdas/main.tf | 65 +++++ terraform/platform/lambdas/variables.tf | 10 + terraform/platform/main.tf | 2 + terraform/platform/monitoring/main.tf | 24 ++ terraform/platform/monitoring/outputs.tf | 10 + 15 files changed, 587 insertions(+), 249 deletions(-) delete mode 100644 .github/workflows/commit-terraform.yml delete mode 100644 .github/workflows/plan-review.yml diff --git a/.github/workflows/commit-terraform.yml b/.github/workflows/commit-terraform.yml deleted file mode 100644 index ec726bc..0000000 --- a/.github/workflows/commit-terraform.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Commit Generated Terraform - -# After a successful tf-apply, commit the generated Terraform files back to -# the app repo so developers can see and review the infrastructure code. -# Uses [skip ci] to prevent re-triggering the pipeline. - -on: - workflow_call: - inputs: - aws_account_id: - description: "AWS account ID" - type: string - default: "553637109631" - aws_region: - description: "AWS region" - type: string - default: "eu-central-1" - tf_root: - description: "Terraform root directory" - type: string - default: "terraform" - -permissions: - contents: write - -jobs: - commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v5 - with: - ref: ${{ github.ref }} - - - name: Check for app.yaml - id: check - run: echo "has_yaml=$(test -f app.yaml && echo true || echo false)" >> "$GITHUB_OUTPUT" - - - uses: hashicorp/setup-terraform@v4 - if: steps.check.outputs.has_yaml == 'true' - with: - terraform_version: "1.7" - terraform_wrapper: false - - - name: Generate GitHub App token - if: steps.check.outputs.has_yaml == 'true' - id: app-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ secrets.PLATFORM_APP_ID }} - private-key: ${{ secrets.PLATFORM_APP_PRIVATE_KEY }} - owner: javaBin - - - name: Checkout platform scripts - if: steps.check.outputs.has_yaml == 'true' - uses: actions/checkout@v5 - with: - repository: javaBin/platform - token: ${{ steps.app-token.outputs.token }} - path: .platform - sparse-checkout: scripts - persist-credentials: false - - - name: Expand modules from app.yaml - if: steps.check.outputs.has_yaml == 'true' - env: - APP_SERVICE: ${{ github.event.repository.name }} - AWS_ACCOUNT_ID: ${{ inputs.aws_account_id }} - AWS_REGION: ${{ inputs.aws_region }} - TF_ROOT: ${{ inputs.tf_root }} - PLATFORM_ROOT: .platform - run: python3 .platform/scripts/expand-modules.py - - - name: Commit and push generated files - if: steps.check.outputs.has_yaml == 'true' - run: sh .platform/scripts/commit-generated-tf.sh "${{ inputs.tf_root }}" "${{ github.ref_name }}" diff --git a/.github/workflows/javabin.yml b/.github/workflows/javabin.yml index c6316a6..203156b 100644 --- a/.github/workflows/javabin.yml +++ b/.github/workflows/javabin.yml @@ -44,7 +44,7 @@ jobs: # 3. Docker build + ECR push (main only, after builds pass) # -------------------------------------------------------------------------- docker-build: - needs: [detect, build-jvm, build-ts, tf-plan, plan-review, tf-apply] + needs: [detect, build-jvm, build-ts, tf-plan, tf-apply] if: | always() && github.ref == 'refs/heads/main' && @@ -52,7 +52,6 @@ jobs: (needs.build-jvm.result == 'success' || needs.build-jvm.result == 'skipped') && (needs.build-ts.result == 'success' || needs.build-ts.result == 'skipped') && needs.tf-plan.result != 'failure' && - needs.plan-review.result != 'failure' && (needs.tf-apply.result == 'success' || needs.tf-apply.result == 'skipped') uses: javaBin/platform/.github/workflows/docker-build.yml@main secrets: inherit @@ -79,25 +78,13 @@ jobs: secrets: inherit # -------------------------------------------------------------------------- - # 5. LLM plan review (only when plan has changes) - # -------------------------------------------------------------------------- - plan-review: - needs: tf-plan - if: needs.tf-plan.outputs.has_changes == 'true' - uses: javaBin/platform/.github/workflows/plan-review.yml@main - with: - plan_key: ${{ needs.tf-plan.outputs.plan_key }} - secrets: inherit - - # -------------------------------------------------------------------------- - # 6. Terraform apply (main only, after review — blocked on HIGH risk) + # 6. Terraform apply (main only, after plan + review — blocked on HIGH risk) # -------------------------------------------------------------------------- tf-apply: - needs: [tf-plan, plan-review] + needs: tf-plan if: >- github.ref == 'refs/heads/main' && - needs.tf-plan.outputs.has_changes == 'true' && - needs.plan-review.result == 'success' + needs.tf-plan.outputs.has_changes == 'true' uses: javaBin/platform/.github/workflows/tf-apply.yml@main with: plan_key: ${{ needs.tf-plan.outputs.plan_key }} diff --git a/.github/workflows/plan-review.yml b/.github/workflows/plan-review.yml deleted file mode 100644 index 6cd48b2..0000000 --- a/.github/workflows/plan-review.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: Plan Review - -on: - workflow_call: - inputs: - plan_key: - description: "S3 key of the plan artifact" - type: string - required: true - aws_account_id: - description: "AWS account ID" - type: string - default: "553637109631" - aws_region: - description: "AWS region" - type: string - default: "eu-central-1" - outputs: - risk_level: - description: "Risk level from LLM review (LOW, MEDIUM, HIGH, FAILED)" - value: ${{ jobs.review.outputs.risk_level }} - -permissions: - id-token: write - contents: read - pull-requests: write - -jobs: - review: - runs-on: ubuntu-latest - outputs: - risk_level: ${{ steps.review.outputs.risk_level }} - env: - PLAN_BUCKET: javabin-ci-plan-artifacts-${{ inputs.aws_account_id }} - steps: - - name: Generate GitHub App token - id: app-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ secrets.PLATFORM_APP_ID }} - private-key: ${{ secrets.PLATFORM_APP_PRIVATE_KEY }} - owner: javaBin - - - uses: actions/checkout@v5 - with: - repository: javaBin/platform - token: ${{ steps.app-token.outputs.token }} - ref: main - sparse-checkout: scripts - path: platform - - - name: Configure AWS credentials via OIDC - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::${{ inputs.aws_account_id }}:role/javabin-ci-app-${{ github.event.repository.name }} - aws-region: ${{ inputs.aws_region }} - - - name: Download plan text from S3 - run: aws s3 cp "s3://${PLAN_BUCKET}/$(dirname "${{ inputs.plan_key }}")/plan-output.txt" plan-output.txt - - - name: Run LLM review - id: review - env: - REVIEW_RESULT_PATH: review-result.json - run: sh platform/scripts/extract-review-risk.sh platform/scripts/review-plan.py plan-output.txt - - - name: Upload risk assessment to S3 - run: | - PLAN_DIR=$(dirname "${{ inputs.plan_key }}") - echo '{"level":"${{ steps.review.outputs.risk_level }}","reviewed_at":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' | \ - aws s3 cp - "s3://${PLAN_BUCKET}/${PLAN_DIR}/risk.json" --content-type application/json - - - name: Post review to PR - if: github.event_name == 'pull_request' - env: - GH_TOKEN: ${{ github.token }} - PR_NUMBER: ${{ github.event.pull_request.number }} - run: sh platform/scripts/post-review-comment.sh - - - name: Alert Slack on HIGH risk - if: github.event_name == 'push' && github.ref == 'refs/heads/main' && steps.review.outputs.risk_level == 'HIGH' - env: - SSM_WEBHOOK_PARAM: /javabin/slack/platform-override-alerts-webhook - PLAN_KEY: ${{ inputs.plan_key }} - run: | - OVERRIDE_URL="https://github.com/javaBin/platform/actions/workflows/approve-override.yml" - RUN_URL="https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" - sh platform/scripts/notify-slack.sh \ - "Deploy Blocked — HIGH Risk Plan" \ - "*Repo:* ${GITHUB_REPOSITORY}\n*Plan:* \`${PLAN_KEY}\`\n<${OVERRIDE_URL}|Approve Override> | <${RUN_URL}|View Run>" diff --git a/.github/workflows/platform-ci.yml b/.github/workflows/platform-ci.yml index 788d64e..82872a8 100644 --- a/.github/workflows/platform-ci.yml +++ b/.github/workflows/platform-ci.yml @@ -44,8 +44,8 @@ jobs: outputs: has_changes: ${{ steps.plan.outputs.has_changes }} plan_key: ${{ steps.upload.outputs.plan_key }} - plan_text_key: ${{ steps.upload.outputs.plan_text_key }} plan_sha256: ${{ steps.upload.outputs.plan_sha256 }} + risk_level: ${{ steps.review.outputs.risk_level }} steps: - uses: actions/checkout@v5 @@ -97,36 +97,18 @@ jobs: PR_NUMBER: ${{ github.event.pull_request.number }} run: sh scripts/post-plan-comment.sh "${{ env.TF_ROOT }}/plan-output.txt" "${{ steps.plan.outputs.has_changes }}" - # -------------------------------------------------------------------------- - # Review — LLM risk analysis via Bedrock - # -------------------------------------------------------------------------- - review: - name: LLM Plan Review - runs-on: ubuntu-latest - needs: plan - if: needs.plan.outputs.has_changes == 'true' - outputs: - risk_level: ${{ steps.review.outputs.risk_level }} - steps: - - uses: actions/checkout@v5 - - - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT_ID }}:role/javabin-ci-infra - aws-region: ${{ env.AWS_REGION }} - role-session-name: javabin-review-${{ github.run_id }} - - - name: Download plan text from S3 - run: mkdir -p "${{ env.TF_ROOT }}" && aws s3 cp "s3://${PLAN_BUCKET}/${{ needs.plan.outputs.plan_text_key }}" "${{ env.TF_ROOT }}/plan-output.txt" - + # ---------------------------------------------------------------- + # LLM Plan Review (inline — plan-output.txt already in workspace) + # ---------------------------------------------------------------- - name: Run LLM review id: review + if: steps.plan.outputs.has_changes == 'true' env: REVIEW_RESULT_PATH: review-result.json run: sh scripts/extract-review-risk.sh scripts/review-plan.py "${{ env.TF_ROOT }}/plan-output.txt" - name: Post review to PR - if: github.event_name == 'pull_request' + if: github.event_name == 'pull_request' && steps.plan.outputs.has_changes == 'true' env: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number }} @@ -134,7 +116,7 @@ jobs: - name: Post HIGH risk to Slack if: steps.review.outputs.risk_level == 'HIGH' && github.ref == 'refs/heads/main' - run: sh scripts/notify-high-risk.sh /javabin/slack/platform-resource-alerts-webhook "https://github.com/javaBin/platform/actions/workflows/approve-override.yml" + run: sh scripts/notify-high-risk.sh /javabin/slack/platform-override-alerts-webhook "https://github.com/javaBin/platform/actions/workflows/approve-override.yml" # -------------------------------------------------------------------------- # Apply — auto-apply on LOW/MEDIUM, block on HIGH @@ -142,7 +124,7 @@ jobs: apply: name: Terraform Apply runs-on: ubuntu-latest - needs: [plan, review] + needs: plan if: >- github.ref == 'refs/heads/main' && github.event_name == 'push' && @@ -164,8 +146,8 @@ jobs: - name: Check risk level env: - RISK: ${{ needs.review.outputs.risk_level }} - run: sh scripts/check-risk-block.sh "$RISK" /javabin/slack/platform-override-alerts-webhook "https://github.com/${{ github.repository }}/actions/workflows/approve-override.yml" + RISK: ${{ needs.plan.outputs.risk_level }} + run: sh scripts/check-risk-block.sh "$RISK" - name: Download Lambda ZIPs from artifact uses: actions/download-artifact@v5 diff --git a/.github/workflows/tf-plan.yml b/.github/workflows/tf-plan.yml index d663d8b..f983dde 100644 --- a/.github/workflows/tf-plan.yml +++ b/.github/workflows/tf-plan.yml @@ -25,6 +25,9 @@ on: plan_sha256: description: "SHA256 hash of the plan artifact" value: ${{ jobs.plan.outputs.plan_sha256 }} + risk_level: + description: "Risk level from LLM review (LOW, MEDIUM, HIGH, FAILED)" + value: ${{ jobs.plan.outputs.risk_level }} permissions: id-token: write @@ -38,6 +41,7 @@ jobs: has_changes: ${{ steps.plan.outputs.has_changes }} plan_key: ${{ steps.upload.outputs.plan_key }} plan_sha256: ${{ steps.upload.outputs.plan_sha256 }} + risk_level: ${{ steps.review.outputs.risk_level }} env: PLAN_BUCKET: javabin-ci-plan-artifacts-${{ inputs.aws_account_id }} steps: @@ -99,3 +103,31 @@ jobs: GH_TOKEN: ${{ github.token }} PR_NUMBER: ${{ github.event.pull_request.number }} run: sh .platform/scripts/post-plan-comment.sh "${{ inputs.tf_root }}/plan-output.txt" "${{ steps.plan.outputs.has_changes }}" + + # ---------------------------------------------------------------- + # LLM Plan Review (runs in same job — plan-output.txt already here) + # ---------------------------------------------------------------- + - name: Run LLM review + id: review + if: steps.plan.outputs.has_changes == 'true' + env: + REVIEW_RESULT_PATH: review-result.json + run: sh .platform/scripts/extract-review-risk.sh .platform/scripts/review-plan.py "${{ inputs.tf_root }}/plan-output.txt" + + - name: Upload risk assessment to S3 + if: steps.plan.outputs.has_changes == 'true' && steps.review.outputs.risk_level != '' + run: | + PLAN_DIR=$(dirname "${{ steps.upload.outputs.plan_key }}") + echo '{"level":"${{ steps.review.outputs.risk_level }}","reviewed_at":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' | \ + aws s3 cp - "s3://${PLAN_BUCKET}/${PLAN_DIR}/risk.json" --content-type application/json + + - name: Post review to PR + if: github.event_name == 'pull_request' && steps.plan.outputs.has_changes == 'true' + env: + GH_TOKEN: ${{ github.token }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: sh .platform/scripts/post-review-comment.sh + + - name: Alert Slack on HIGH risk + if: github.event_name == 'push' && github.ref == 'refs/heads/main' && steps.review.outputs.risk_level == 'HIGH' + run: sh .platform/scripts/notify-high-risk.sh /javabin/slack/platform-override-alerts-webhook "https://github.com/javaBin/platform/actions/workflows/approve-override.yml" diff --git a/scripts/check-risk-block.sh b/scripts/check-risk-block.sh index 12ae7be..7f518b4 100644 --- a/scripts/check-risk-block.sh +++ b/scripts/check-risk-block.sh @@ -1,16 +1,15 @@ #!/bin/sh -# Block apply if risk is HIGH or FAILED. Notify Slack with override link. +# Block apply if risk is HIGH or FAILED. # -# Usage: check-risk-block.sh +# Usage: check-risk-block.sh # # Exits 0 if safe to apply, 1 if blocked. +# Slack notification is handled by notify-high-risk.sh in the review step — +# no duplicate alert here. set -e RISK="$1" -SSM_PARAM="$2" -OVERRIDE_URL="$3" -SCRIPT_DIR=$(dirname "$0") echo "LLM review risk: ${RISK}" @@ -19,12 +18,4 @@ if [ "$RISK" != "HIGH" ] && [ "$RISK" != "FAILED" ] && [ -n "$RISK" ]; then fi echo "Auto-apply blocked (risk=${RISK})." - -export SSM_WEBHOOK_PARAM="$SSM_PARAM" -sh "$SCRIPT_DIR/notify-slack.sh" \ - "Deploy Blocked — ${RISK} Risk" \ - "*Repo:* ${GITHUB_REPOSITORY}\n*SHA:* \`$(echo "$GITHUB_SHA" | cut -c1-8)\`\n*Actor:* ${GITHUB_ACTOR}" \ - "$OVERRIDE_URL" \ - "Approve Override" || true - exit 1 diff --git a/scripts/notify-high-risk.sh b/scripts/notify-high-risk.sh index 36e110a..f7faefc 100644 --- a/scripts/notify-high-risk.sh +++ b/scripts/notify-high-risk.sh @@ -1,21 +1,88 @@ #!/bin/sh -# Notify Slack about a HIGH risk plan review with override link. +# Notify Slack about a HIGH risk plan review with full findings and override link. # # Usage: notify-high-risk.sh # # Reads review-result.json from current directory. +# Uses GitHub Actions env vars for repo context (GITHUB_REPOSITORY, GITHUB_SHA, GITHUB_ACTOR). set -e SSM_PARAM="$1" OVERRIDE_URL="$2" -SCRIPT_DIR=$(dirname "$0") -SUMMARY=$(jq -r '.summary // "Review failed"' review-result.json 2>/dev/null || echo "Review failed") +# Resolve webhook URL from SSM +SLACK_WEBHOOK_URL=$(aws ssm get-parameter \ + --name "$SSM_PARAM" \ + --with-decryption --query Parameter.Value --output text 2>/dev/null || echo "") -export SSM_WEBHOOK_PARAM="$SSM_PARAM" -sh "$SCRIPT_DIR/notify-slack.sh" \ - "Deploy Blocked — HIGH Risk Plan" \ - "*Summary:* ${SUMMARY}" \ - "$OVERRIDE_URL" \ - "Approve Override" +if [ -z "$SLACK_WEBHOOK_URL" ]; then + echo "No webhook URL available — skipping Slack notification." + exit 0 +fi + +# Extract review data +SUMMARY=$(jq -r '.summary // "Review unavailable"' review-result.json 2>/dev/null || echo "Review unavailable") +RISK=$(jq -r '.risk // "HIGH"' review-result.json 2>/dev/null || echo "HIGH") + +# Build findings blocks as JSON array entries +FINDINGS_TEXT="" +if [ -f review-result.json ]; then + FINDINGS_TEXT=$(python3 -c " +import json, sys +try: + data = json.load(open('review-result.json')) + cat_emoji = {'security': ':lock:', 'cost': ':moneybag:', 'destruction': ':boom:', 'routine': ':white_check_mark:'} + lines = [] + for f in data.get('findings', []): + emoji = cat_emoji.get(f.get('category', ''), ':black_small_square:') + lines.append(f\"{emoji} *[{f.get('category', 'other')}]* {f.get('detail', '')}\") + print('\n'.join(lines)) +except Exception: + pass +" 2>/dev/null || echo "") +fi + +# Repo context from GitHub Actions +REPO="${GITHUB_REPOSITORY:-unknown}" +SHA=$(echo "${GITHUB_SHA:-unknown}" | cut -c1-8) +ACTOR="${GITHUB_ACTOR:-unknown}" +RUN_ID="${GITHUB_RUN_ID:-}" + +# Build source line +if [ -n "$RUN_ID" ] && [ "$REPO" != "unknown" ]; then + SOURCE_LINE=":robot_face: / / (triggered by ${ACTOR})" +else + SOURCE_LINE=":bust_in_silhouette: ${REPO} / \`${SHA}\` / ${ACTOR}" +fi + +# Build the Block Kit payload +# Note: findings section only included if there are findings +FINDINGS_BLOCK="" +if [ -n "$FINDINGS_TEXT" ]; then + # Escape for JSON + ESCAPED_FINDINGS=$(echo "$FINDINGS_TEXT" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read())[1:-1])") + FINDINGS_BLOCK=",{\"type\":\"section\",\"text\":{\"type\":\"mrkdwn\",\"text\":\"*Findings*\n${ESCAPED_FINDINGS}\"}}" +fi + +ESCAPED_SUMMARY=$(echo "$SUMMARY" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read().strip())[1:-1])") +ESCAPED_SOURCE=$(echo "$SOURCE_LINE" | python3 -c "import sys,json; print(json.dumps(sys.stdin.read().strip())[1:-1])") + +PAYLOAD=$(cat < /dev/null +echo "Slack notification sent: HIGH risk plan blocked" diff --git a/terraform/lambda-src/slack_alert/handler.py b/terraform/lambda-src/slack_alert/handler.py index f594d75..7f80220 100644 --- a/terraform/lambda-src/slack_alert/handler.py +++ b/terraform/lambda-src/slack_alert/handler.py @@ -1,3 +1,4 @@ +import hashlib import json import logging import os @@ -15,6 +16,7 @@ logger.setLevel(logging.INFO) ssm = boto3.client("ssm") +dynamodb = boto3.resource("dynamodb") # SSM parameter names passed via environment INFRA_WEBHOOK_PARAM = os.environ["INFRA_WEBHOOK_PARAM"] @@ -22,10 +24,50 @@ SECURITY_TOPIC_ARN = os.environ["SECURITY_TOPIC_ARN"] PROJECT_PREFIX = os.environ.get("PROJECT_PREFIX", "javabin") GITHUB_ORG_URL = os.environ.get("GITHUB_ORG_URL", "https://github.com/javaBin") +DEDUP_TABLE_NAME = os.environ.get("DEDUP_TABLE_NAME", "javabin-alert-dedup") # Dedup cache: "{action}:{resource}" -> timestamp _recent_alerts = {} DEDUP_WINDOW = 300 # 5 minutes +DEDUP_TTL_DAYS = 30 + + +# --------------------------------------------------------------------------- +# DynamoDB dedup — persistent dedup for Security Hub + compliance alerts +# --------------------------------------------------------------------------- +def _dedup_table(): + return dynamodb.Table(DEDUP_TABLE_NAME) + + +def _finding_dedup_key(finding): + """Build a dedup key from a Security Hub finding.""" + resources = finding.get("Resources", []) + resource_arn = resources[0].get("Id", "unknown") if resources else "unknown" + title_hash = hashlib.sha256(finding.get("Title", "").encode()).hexdigest()[:12] + return f"{resource_arn}:{title_hash}" + + +def is_finding_already_alerted(dedup_key): + """Check DynamoDB for an existing alert entry.""" + try: + resp = _dedup_table().get_item(Key={"finding_key": dedup_key}) + return "Item" in resp + except Exception as e: + logger.warning("DynamoDB dedup check failed (allowing alert): %s", e) + return False + + +def record_finding_alert(dedup_key): + """Write dedup entry with TTL.""" + try: + expires_at = int(time.time()) + (DEDUP_TTL_DAYS * 86400) + _dedup_table().put_item(Item={ + "finding_key": dedup_key, + "alerted_at": datetime.now(timezone.utc).isoformat(), + "expires_at": expires_at, + }) + except Exception as e: + logger.warning("DynamoDB dedup write failed: %s", e) # --------------------------------------------------------------------------- # Console URL builder — derives service from eventSource, minimal overrides @@ -602,8 +644,15 @@ def format_compliance_alert(parsed): if PROJECT_PREFIX not in resource_id.lower(): return None - compliance = detail.get("newEvaluationResult", {}).get("complianceType", "UNKNOWN") + # DynamoDB dedup for compliance alerts rule_name = detail.get("configRuleName", "unknown") + dedup_key = f"compliance:{resource_id}:{rule_name}" + if is_finding_already_alerted(dedup_key): + logger.info("Compliance alert suppressed (already alerted): %s", dedup_key) + return None + record_finding_alert(dedup_key) + + compliance = detail.get("newEvaluationResult", {}).get("complianceType", "UNKNOWN") short_type = resource_type.replace("AWS::", "").replace("::", " ") blocks = [ @@ -780,7 +829,7 @@ def format_guardduty_finding(parsed): def format_securityhub_finding(parsed): - """Security Hub finding alert — Block Kit.""" + """Security Hub finding alert — Block Kit with DynamoDB dedup and Acknowledge button.""" findings = parsed.get("detail", {}).get("findings", []) if not findings: return None @@ -791,6 +840,14 @@ def format_securityhub_finding(parsed): description = finding.get("Description", "") region = parsed.get("region", "unknown") account = parsed.get("account", "unknown") + finding_id = finding.get("Id", "") + + # DynamoDB dedup — suppress if already alerted for this resource+finding + dedup_key = _finding_dedup_key(finding) + if is_finding_already_alerted(dedup_key): + logger.info("Security Hub finding suppressed (already alerted): %s", dedup_key) + return None + record_finding_alert(dedup_key) sev_emoji = { "CRITICAL": ":red_circle:", @@ -802,8 +859,6 @@ def format_securityhub_finding(parsed): resources = finding.get("Resources", []) resource_ids = [r.get("Id", "unknown").split("/")[-1] for r in resources[:3]] - sh_url = f"https://{region}.console.aws.amazon.com/securityhub/home?region={region}#/findings" - fields = [ {"type": "mrkdwn", "text": f"*Severity*\n{sev_emoji} {severity_label}"}, {"type": "mrkdwn", "text": f"*Resources*\n{', '.join(resource_ids)}"}, @@ -823,10 +878,21 @@ def format_securityhub_finding(parsed): "text": {"type": "mrkdwn", "text": f"_{description[:500]}_"} }) + # Link button to view & suppress in Security Hub console + encoded_id = finding_id.replace("/", "%2F").replace(":", "%3A") + sh_finding_url = ( + f"https://{region}.console.aws.amazon.com/securityhub/home" + f"?region={region}#/findings?search=Id%3D{encoded_id}" + ) blocks.append({ - "type": "section", - "text": {"type": "mrkdwn", "text": f"<{sh_url}|View in Security Hub>"} + "type": "actions", + "elements": [{ + "type": "button", + "text": {"type": "plain_text", "text": "View & Suppress in Security Hub", "emoji": True}, + "url": sh_finding_url, + }], }) + blocks.append({"type": "divider"}) blocks.append({ "type": "context", @@ -911,6 +977,151 @@ def format_budget_alert(parsed): return {"blocks": blocks, "text": fallback} +# --------------------------------------------------------------------------- +# Weekly Security Hub summary +# --------------------------------------------------------------------------- +def _extract_resource_type(finding): + """Extract a short resource type from a Security Hub finding.""" + resources = finding.get("Resources", []) + if not resources: + return "Unknown" + # Type looks like "AwsS3Bucket", "AwsEc2SecurityGroup", etc. + raw = resources[0].get("Type", "Other") + # Strip "Aws" prefix and insert spaces before capitals + name = raw.removeprefix("Aws") + return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", name) + + +def _severity_cell(label): + """Build a rich_text table cell with emoji for severity.""" + emoji_name = "red_circle" if label == "CRITICAL" else "large_orange_circle" + return { + "type": "rich_text", + "elements": [{ + "type": "rich_text_section", + "elements": [ + {"type": "emoji", "name": emoji_name}, + {"type": "text", "text": f" {label}"}, + ], + }], + } + + +def format_securityhub_summary(): + """Query Security Hub for active HIGH/CRITICAL findings and format a table summary.""" + securityhub = boto3.client("securityhub", region_name=DEPLOY_REGION) + + findings = [] + next_token = None + while True: + kwargs = { + "Filters": { + "SeverityLabel": [ + {"Value": "HIGH", "Comparison": "EQUALS"}, + {"Value": "CRITICAL", "Comparison": "EQUALS"}, + ], + "WorkflowStatus": [ + {"Value": "NEW", "Comparison": "EQUALS"}, + {"Value": "NOTIFIED", "Comparison": "EQUALS"}, + ], + "RecordState": [ + {"Value": "ACTIVE", "Comparison": "EQUALS"}, + ], + }, + "MaxResults": 100, + } + if next_token: + kwargs["NextToken"] = next_token + resp = securityhub.get_findings(**kwargs) + findings.extend(resp.get("Findings", [])) + next_token = resp.get("NextToken") + if not next_token: + break + + if not findings: + return None + + # Sort: CRITICAL first, then HIGH, then by resource + sev_order = {"CRITICAL": 0, "HIGH": 1} + findings.sort(key=lambda f: ( + sev_order.get(f.get("Severity", {}).get("Label", "HIGH"), 1), + f.get("Resources", [{}])[0].get("Id", "") if f.get("Resources") else "", + )) + + # Build table rows — header + one row per finding + header_row = [ + {"type": "raw_text", "text": "Severity"}, + {"type": "raw_text", "text": "Type"}, + {"type": "raw_text", "text": "Resource"}, + {"type": "raw_text", "text": "Finding"}, + ] + + rows = [header_row] + for f in findings[:50]: # Cap at 50 rows to stay within Block Kit limits + sev_label = f.get("Severity", {}).get("Label", "HIGH") + resource_type = _extract_resource_type(f) + resources = f.get("Resources", []) + resource_id = resources[0].get("Id", "unknown").split("/")[-1] if resources else "unknown" + title = f.get("Title", "Unknown") + + rows.append([ + _severity_cell(sev_label), + {"type": "raw_text", "text": resource_type}, + {"type": "raw_text", "text": resource_id}, + {"type": "raw_text", "text": title[:120]}, + ]) + + region = DEPLOY_REGION + sh_url = f"https://{region}.console.aws.amazon.com/securityhub/home?region={region}#/findings" + ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + + blocks = [ + {"type": "header", + "text": {"type": "plain_text", + "text": "Weekly Security Hub Summary", "emoji": True}}, + {"type": "section", + "text": {"type": "mrkdwn", + "text": f"*{len(findings)} active HIGH/CRITICAL findings*"}}, + { + "type": "table", + "column_settings": [ + {"align": "center"}, + {"is_wrapped": True}, + {"is_wrapped": True}, + {"is_wrapped": True}, + ], + "rows": rows, + }, + {"type": "section", + "text": {"type": "mrkdwn", + "text": f"<{sh_url}|View all findings in Security Hub>"}}, + {"type": "divider"}, + {"type": "context", + "elements": [{"type": "mrkdwn", "text": f"Weekly summary | {ts}"}]}, + ] + + return { + "blocks": blocks, + "text": f"Weekly Security Hub Summary: {len(findings)} active findings", + } + + +def summary_handler(event, context): + """Lambda handler for the weekly Security Hub summary.""" + webhook_url = _get_webhook(INFRA_WEBHOOK_PARAM) + try: + result = format_securityhub_summary() + if result: + post_to_slack(webhook_url, result) + logger.info("Posted weekly Security Hub summary") + else: + logger.info("No active HIGH/CRITICAL findings — skipping summary") + except Exception as e: + logger.error("Weekly summary failed: %s", e) + + return {"statusCode": 200} + + # --------------------------------------------------------------------------- # Main handler # --------------------------------------------------------------------------- diff --git a/terraform/lambda-src/team_provisioner/handler.py b/terraform/lambda-src/team_provisioner/handler.py index 95f81bb..4908b0c 100644 --- a/terraform/lambda-src/team_provisioner/handler.py +++ b/terraform/lambda-src/team_provisioner/handler.py @@ -1473,25 +1473,144 @@ def _result_line(provider, outcome): reason = outcome.get("reason", "skipped") return f":white_circle: {provider}: {reason}" if outcome.get("error"): - return f":red_circle: {provider}: {outcome['error']}" - return f":white_check_mark: {provider}: synced" + return f":red_circle: {provider}: {outcome['error'][:200]}" + # Include detail counts when available (google group sync) + parts = [f":white_check_mark: {provider}: synced"] + details = [] + if outcome.get("added"): + details.append(f"+{outcome['added']} added") + if outcome.get("member_count"): + details.append(f"{outcome['member_count']} total") + if outcome.get("extra_preserved"): + details.append(f"{outcome['extra_preserved']} manual") + if details: + parts.append(f"({', '.join(details)})") + return " ".join(parts) + + +def _status_cell(status): + """Build a rich_text table cell with emoji for account/alias status.""" + emoji_map = { + "created": ("white_check_mark", "Created"), + "existed": ("heavy_minus_sign", "Existed"), + "failed": ("x", "Failed"), + } + emoji_name, label = emoji_map.get(status, ("question", status)) + return { + "type": "rich_text", + "elements": [{ + "type": "rich_text_section", + "elements": [ + {"type": "emoji", "name": emoji_name}, + {"type": "text", "text": f" {label}"}, + ], + }], + } + + +def _hero_sync_table(accounts, aliases): + """Build a single table for all hero account + alias provisioning. + + Combines accounts and aliases into one table. Returns None if nothing happened. + Only 1 table block allowed per Slack message. + """ + created = accounts.get("created", []) + existed = accounts.get("existed", []) + failed = accounts.get("failed", []) + aliases_created = aliases.get("created", []) if aliases else [] + aliases_failed = aliases.get("failed", []) if aliases else [] + + if not any([created, existed, failed, aliases_created, aliases_failed]): + return None + + header = [ + {"type": "raw_text", "text": "Type"}, + {"type": "raw_text", "text": "Email / Alias"}, + {"type": "raw_text", "text": "Status"}, + ] + rows = [header] + + for email in created: + rows.append([ + {"type": "raw_text", "text": "Account"}, + {"type": "raw_text", "text": email}, + _status_cell("created"), + ]) + for email in existed: + rows.append([ + {"type": "raw_text", "text": "Account"}, + {"type": "raw_text", "text": email}, + _status_cell("existed"), + ]) + for entry in failed: + email = entry.get("email", "?") + error = entry.get("error", "unknown")[:80] + rows.append([ + {"type": "raw_text", "text": "Account"}, + {"type": "raw_text", "text": f"{email} ({error})"}, + _status_cell("failed"), + ]) + for entry in aliases_created: + rows.append([ + {"type": "raw_text", "text": "Alias"}, + {"type": "raw_text", "text": f"{entry.get('alias', '?')} -> {entry.get('user', '?')}"}, + _status_cell("created"), + ]) + for entry in aliases_failed: + rows.append([ + {"type": "raw_text", "text": "Alias"}, + {"type": "raw_text", "text": f"{entry.get('alias', '?')} ({entry.get('error', '')[:60]})"}, + _status_cell("failed"), + ]) + + return { + "type": "table", + "column_settings": [ + {"align": "center"}, + {"is_wrapped": True}, + {"align": "center"}, + ], + "rows": rows[:51], # Block Kit limit + } def notify_slack(results): - """Post provisioning summary to Slack.""" + """Post provisioning summary to Slack. + + Handles two result structures: + - Hero sync: results contains top-level 'accounts' and 'aliases' keys + alongside per-group results + - Team sync: results maps team_name -> {provider: outcome} + + Uses a table for hero account/alias results. Omits sections with no content. + """ webhook_url = get_webhook_url(ssm, INFRA_WEBHOOK_PARAM) + # Detect hero sync results by checking for 'accounts' key + accounts = results.pop("accounts", None) + aliases = results.pop("aliases", None) + is_hero_sync = accounts is not None + + header_text = ( + ":busts_in_silhouette: Hero & Group Sync Complete" + if is_hero_sync + else ":busts_in_silhouette: Team Provisioning Complete" + ) + blocks = [ { "type": "header", - "text": { - "type": "plain_text", - "text": ":busts_in_silhouette: Team Provisioning Complete", - "emoji": True, - }, + "text": {"type": "plain_text", "text": header_text, "emoji": True}, } ] + # Hero-specific: single table for accounts + aliases + if accounts is not None: + table = _hero_sync_table(accounts, aliases) + if table: + blocks.append(table) + + # Per-group / per-team results for team_name, result in results.items(): lines = [_result_line(p, o) for p, o in result.items()] blocks.append( @@ -1521,7 +1640,11 @@ def notify_slack(results): webhook_url, { "blocks": blocks, - "text": f"Team provisioning: {', '.join(results.keys())}", + "text": ( + f"Hero sync: {', '.join(results.keys())}" + if is_hero_sync + else f"Team provisioning: {', '.join(results.keys())}" + ), }, ) diff --git a/terraform/platform/iam/main.tf b/terraform/platform/iam/main.tf index 2d31b34..9a2b0e1 100644 --- a/terraform/platform/iam/main.tf +++ b/terraform/platform/iam/main.tf @@ -170,10 +170,9 @@ resource "aws_iam_role" "ci_app" { } StringLike = { "token.actions.githubusercontent.com:sub" = "repo:${var.github_org}/${each.key}:*" - # Plan and review only — apply goes through the gate Lambda + # Plan + review (merged into tf-plan) — apply goes through the gate Lambda "token.actions.githubusercontent.com:job_workflow_ref" = [ "${var.github_org}/platform/.github/workflows/tf-plan.yml@refs/heads/main", - "${var.github_org}/platform/.github/workflows/plan-review.yml@refs/heads/main", ] } } diff --git a/terraform/platform/lambdas/main.tf b/terraform/platform/lambdas/main.tf index 98e8d55..354c395 100644 --- a/terraform/platform/lambdas/main.tf +++ b/terraform/platform/lambdas/main.tf @@ -174,6 +174,22 @@ resource "aws_iam_role_policy" "slack_alert" { # Pricing API is global and only supports * for resource Resource = "*" }, + { + Sid = "DynamoDBDedup" + Effect = "Allow" + Action = [ + "dynamodb:GetItem", + "dynamodb:PutItem", + ] + Resource = var.alert_dedup_table_arn + }, + { + Sid = "SecurityHubRead" + Effect = "Allow" + Action = "securityhub:GetFindings" + # Security Hub requires * for resource + Resource = "*" + }, ] }) } @@ -505,6 +521,7 @@ resource "aws_lambda_function" "slack_alert" { PROJECT_PREFIX = var.project GITHUB_ORG_URL = local.github_org_url DEPLOY_REGION = var.region + DEDUP_TABLE_NAME = var.alert_dedup_table_name } } } @@ -746,6 +763,54 @@ resource "aws_lambda_permission" "override_cleanup_schedule" { # --- team-provisioner: no schedule — triggered via direct Lambda invocation from registry CI --- +# --- securityhub-summary: Monday 08:00 UTC (same as cost report) --- +resource "aws_cloudwatch_event_rule" "securityhub_summary_schedule" { + name = "${var.project}-securityhub-summary-schedule" + description = "Weekly Security Hub summary — Monday 08:00 UTC" + schedule_expression = "cron(0 8 ? * MON *)" +} + +resource "aws_cloudwatch_event_target" "securityhub_summary" { + rule = aws_cloudwatch_event_rule.securityhub_summary_schedule.name + target_id = "invoke-securityhub-summary" + arn = aws_lambda_function.securityhub_summary.arn +} + +resource "aws_lambda_permission" "securityhub_summary_schedule" { + statement_id = "AllowEventBridge" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.securityhub_summary.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.securityhub_summary_schedule.arn +} + +################################################################################ +# Security Hub Summary — weekly summary Lambda (reuses slack-alert code) +################################################################################ + +resource "aws_lambda_function" "securityhub_summary" { + function_name = "${var.project}-securityhub-summary" + role = aws_iam_role.slack_alert.arn + handler = "handler.summary_handler" + runtime = "python3.12" + timeout = 60 + memory_size = 256 + filename = data.archive_file.slack_alert.output_path + source_code_hash = data.archive_file.slack_alert.output_base64sha256 + + environment { + variables = { + INFRA_WEBHOOK_PARAM = "/javabin/slack/platform-resource-alerts-webhook" + COST_WEBHOOK_PARAM = "/javabin/slack/platform-cost-alerts-webhook" + SECURITY_TOPIC_ARN = var.security_topic_arn + PROJECT_PREFIX = var.project + GITHUB_ORG_URL = local.github_org_url + DEPLOY_REGION = var.region + DEDUP_TABLE_NAME = var.alert_dedup_table_name + } + } +} + ################################################################################ # Apply Gate — credential broker for Terraform apply ################################################################################ diff --git a/terraform/platform/lambdas/variables.tf b/terraform/platform/lambdas/variables.tf index a3a433e..aeeb0c1 100644 --- a/terraform/platform/lambdas/variables.tf +++ b/terraform/platform/lambdas/variables.tf @@ -50,3 +50,13 @@ variable "sso_instance_arn" { default = "" } +variable "alert_dedup_table_name" { + description = "DynamoDB table name for alert deduplication" + type = string +} + +variable "alert_dedup_table_arn" { + description = "DynamoDB table ARN for alert deduplication" + type = string +} + diff --git a/terraform/platform/main.tf b/terraform/platform/main.tf index a9e1718..b40a75e 100644 --- a/terraform/platform/main.tf +++ b/terraform/platform/main.tf @@ -64,6 +64,8 @@ module "lambdas" { internal_user_pool_arn = module.identity.internal_user_pool_arn identity_store_id = var.identity_store_id sso_instance_arn = var.sso_instance_arn + alert_dedup_table_name = module.monitoring.alert_dedup_table_name + alert_dedup_table_arn = module.monitoring.alert_dedup_table_arn } module "identity" { diff --git a/terraform/platform/monitoring/main.tf b/terraform/platform/monitoring/main.tf index 7fe8a9a..c5f461a 100644 --- a/terraform/platform/monitoring/main.tf +++ b/terraform/platform/monitoring/main.tf @@ -465,3 +465,27 @@ resource "aws_cloudwatch_event_target" "securityhub_findings_sns" { target_id = "send-to-security-sns" arn = aws_sns_topic.security.arn } + +################################################################################ +# DynamoDB — alert dedup table (Security Hub + compliance) +################################################################################ + +resource "aws_dynamodb_table" "alert_dedup" { + name = "${var.project}-alert-dedup" + billing_mode = "PAY_PER_REQUEST" + hash_key = "finding_key" + + attribute { + name = "finding_key" + type = "S" + } + + ttl { + attribute_name = "expires_at" + enabled = true + } + + tags = { + Name = "${var.project}-alert-dedup" + } +} diff --git a/terraform/platform/monitoring/outputs.tf b/terraform/platform/monitoring/outputs.tf index 0194a34..a8e1be2 100644 --- a/terraform/platform/monitoring/outputs.tf +++ b/terraform/platform/monitoring/outputs.tf @@ -7,3 +7,13 @@ output "security_topic_arn" { description = "ARN of the security SNS topic (IAM changes, GuardDuty, Config, Security Hub)" value = aws_sns_topic.security.arn } + +output "alert_dedup_table_name" { + description = "DynamoDB table name for alert deduplication" + value = aws_dynamodb_table.alert_dedup.name +} + +output "alert_dedup_table_arn" { + description = "DynamoDB table ARN for alert deduplication" + value = aws_dynamodb_table.alert_dedup.arn +}