diff --git a/k8s/deployment/publish_alb_metrics b/k8s/deployment/publish_alb_metrics new file mode 100755 index 00000000..fd695b77 --- /dev/null +++ b/k8s/deployment/publish_alb_metrics @@ -0,0 +1,141 @@ +#!/bin/bash +# Post-deployment ALB metrics publisher +# Publishes ALB rule count and target group count as custom metrics +# to CloudWatch or Datadog for continuous monitoring and alerting. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/../utils/get_config_value" + +ALB_METRICS_PUBLISH_ENABLED=$(get_config_value \ + --env ALB_METRICS_PUBLISH_ENABLED \ + --provider '.providers["scope-configurations"].networking.alb_metrics_enabled' \ + --default "false" +) + +ALB_METRICS_PUBLISH_TARGET=$(get_config_value \ + --env ALB_METRICS_PUBLISH_TARGET \ + --provider '.providers["scope-configurations"].networking.alb_metrics_target' \ + --default "cloudwatch" +) + +if [ "$ALB_METRICS_PUBLISH_ENABLED" != "true" ]; then + return 0 +fi + +ALB_NAME=$(echo "$CONTEXT" | jq -r '.alb_name') +REGION=$(echo "$CONTEXT" | jq -r '.region') + +if [ -z "$ALB_NAME" ] || [ "$ALB_NAME" = "null" ]; then + log warn "⚠️ ALB metrics: ALB name not found in context" + return 0 +fi + +# Resolve ALB ARN +ALB_ARN=$(aws elbv2 describe-load-balancers \ + --names "$ALB_NAME" \ + --region "$REGION" \ + --query 'LoadBalancers[0].LoadBalancerArn' \ + --output text 2>/dev/null) + +if [ $? -ne 0 ] || [ "$ALB_ARN" = "None" ] || [ -z "$ALB_ARN" ]; then + log warn "⚠️ ALB metrics: could not find ALB [$ALB_NAME]" + return 0 +fi + +# Count rules across all listeners +TOTAL_RULES=0 +LISTENERS=$(aws elbv2 describe-listeners \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --output json 2>/dev/null) + +if [ $? -ne 0 ]; then + log warn "⚠️ ALB metrics: could not retrieve listeners" + return 0 +fi + +LISTENER_ARNS=$(echo "$LISTENERS" | jq -r '.Listeners[].ListenerArn') + +for listener_arn in $LISTENER_ARNS; do + RULES=$(aws elbv2 describe-rules \ + --listener-arn "$listener_arn" \ + --region "$REGION" \ + --output json 2>/dev/null) + + if [ $? -eq 0 ]; then + LISTENER_RULE_COUNT=$(echo "$RULES" | jq '[.Rules[] | select(.IsDefault != true)] | length') + TOTAL_RULES=$((TOTAL_RULES + LISTENER_RULE_COUNT)) + fi +done + +# Count target groups +TARGET_GROUPS=$(aws elbv2 describe-target-groups \ + --load-balancer-arn "$ALB_ARN" \ + --region "$REGION" \ + --output json 2>/dev/null) + +TG_COUNT=0 +if [ $? -eq 0 ]; then + TG_COUNT=$(echo "$TARGET_GROUPS" | jq '.TargetGroups | length') +fi + +# Publish metrics +case "$ALB_METRICS_PUBLISH_TARGET" in + cloudwatch) + aws cloudwatch put-metric-data \ + --namespace "nullplatform/ApplicationELB" \ + --metric-data "[ + {\"MetricName\":\"RuleCount\",\"Value\":$TOTAL_RULES,\"Unit\":\"Count\",\"Dimensions\":[{\"Name\":\"ALBName\",\"Value\":\"$ALB_NAME\"}]}, + {\"MetricName\":\"TargetGroupCount\",\"Value\":$TG_COUNT,\"Unit\":\"Count\",\"Dimensions\":[{\"Name\":\"ALBName\",\"Value\":\"$ALB_NAME\"}]} + ]" \ + --region "$REGION" 2>/dev/null + + if [ $? -eq 0 ]; then + log info "✅ ALB metrics published to CloudWatch (rules: $TOTAL_RULES, target_groups: $TG_COUNT)" + else + log error "❌ ALB metrics: failed to publish to CloudWatch" + fi + ;; + + datadog) + if [ -z "$DATADOG_API_KEY" ]; then + log warn "⚠️ ALB metrics: DATADOG_API_KEY not set" + return 0 + fi + + DATADOG_SITE="${DATADOG_SITE:-datadoghq.com}" + TIMESTAMP=$(date +%s) + + RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -X POST "https://api.${DATADOG_SITE}/api/v2/series" \ + -H "DD-API-KEY: $DATADOG_API_KEY" \ + -H "Content-Type: application/json" \ + -d "{ + \"series\": [ + { + \"metric\": \"nullplatform.applicationelb.rule_count\", + \"type\": 1, + \"points\": [{\"timestamp\": $TIMESTAMP, \"value\": $TOTAL_RULES}], + \"tags\": [\"alb_name:$ALB_NAME\", \"region:$REGION\"] + }, + { + \"metric\": \"nullplatform.applicationelb.target_group_count\", + \"type\": 1, + \"points\": [{\"timestamp\": $TIMESTAMP, \"value\": $TG_COUNT}], + \"tags\": [\"alb_name:$ALB_NAME\", \"region:$REGION\"] + } + ] + }" 2>/dev/null) + + if [ "$RESPONSE" = "202" ]; then + log info "✅ ALB metrics published to Datadog (rules: $TOTAL_RULES, target_groups: $TG_COUNT)" + else + log error "❌ ALB metrics: failed to publish to Datadog (HTTP $RESPONSE)" + fi + ;; + + *) + log warn "⚠️ ALB metrics: unknown target '$ALB_METRICS_PUBLISH_TARGET'" + ;; +esac + +return 0 diff --git a/k8s/deployment/tests/publish_alb_metrics.bats b/k8s/deployment/tests/publish_alb_metrics.bats new file mode 100644 index 00000000..48f85d03 --- /dev/null +++ b/k8s/deployment/tests/publish_alb_metrics.bats @@ -0,0 +1,279 @@ +#!/usr/bin/env bats + +setup() { + PROJECT_ROOT="$(cd "$(dirname "$BATS_TEST_FILENAME")/../../.." && pwd)" + source "$PROJECT_ROOT/testing/assertions.sh" + + export SCRIPT="$PROJECT_ROOT/k8s/deployment/publish_alb_metrics" + + # Default context + export CONTEXT='{"alb_name":"k8s-nullplatform-internet-facing","region":"us-east-1"}' + + # Default config + export ALB_METRICS_PUBLISH_ENABLED="true" + export ALB_METRICS_PUBLISH_TARGET="cloudwatch" + + # Track calls + export AWS_CALLS_LOG="$BATS_TEST_TMPDIR/aws_calls.log" + export CURL_CALLS_LOG="$BATS_TEST_TMPDIR/curl_calls.log" + + # Mock aws CLI + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) + echo "arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/k8s-nullplatform-internet-facing/abc123" + ;; + *"describe-listeners"*) + echo '{"Listeners":[{"ListenerArn":"arn:aws:elasticloadbalancing:us-east-1:123456789:listener/app/abc/123"}]}' + ;; + *"describe-rules"*) + echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false},{"IsDefault":false}]}' + ;; + *"describe-target-groups"*) + echo '{"TargetGroups":[{},{},{},{},{}]}' + ;; + *"put-metric-data"*) + return 0 + ;; + esac + } + export -f aws + + # Mock curl + curl() { + echo "$*" >> "$CURL_CALLS_LOG" + echo "202" + } + export -f curl + + # Source real get_config_value (uses CONTEXT + env vars already set) + source "$PROJECT_ROOT/k8s/utils/get_config_value" + export -f get_config_value + + # Mock log function (from k8s/logging) + log() { + local level="${1:-info}" + local message="${2:-}" + echo "$message" + } + export -f log +} + +run_script() { + run bash -c 'source "$SCRIPT"' +} + +# ============================================================================= +# Disabled / skipped scenarios +# ============================================================================= + +@test "skips silently when ALB_METRICS_PUBLISH_ENABLED is false" { + export ALB_METRICS_PUBLISH_ENABLED="false" + run_script + assert_equal "$status" "0" + assert_equal "$output" "" +} + +@test "skips silently when ALB_METRICS_PUBLISH_ENABLED is not set" { + unset ALB_METRICS_PUBLISH_ENABLED + run_script + assert_equal "$status" "0" + assert_equal "$output" "" +} + +# ============================================================================= +# Error scenarios +# ============================================================================= + +@test "warns when ALB name not found in context" { + export CONTEXT='{"region":"us-east-1"}' + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: ALB name not found in context" +} + +@test "warns when ALB name is null in context" { + export CONTEXT='{"alb_name":null,"region":"us-east-1"}' + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: ALB name not found in context" +} + +@test "warns when ALB not found in AWS" { + aws() { + case "$*" in + *"describe-load-balancers"*) echo "None" ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not find ALB" +} + +@test "warns when describe-load-balancers fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not find ALB" +} + +@test "warns when describe-listeners fails" { + aws() { + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: could not retrieve listeners" +} + +# ============================================================================= +# CloudWatch success +# ============================================================================= + +@test "publishes to CloudWatch with correct rule and target group counts" { + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics published to CloudWatch (rules: 3, target_groups: 5)" +} + +@test "CloudWatch put-metric-data uses correct namespace and dimensions" { + run_script + local calls=$(cat "$AWS_CALLS_LOG") + assert_contains "$calls" "nullplatform/ApplicationELB" + assert_contains "$calls" "k8s-nullplatform-internet-facing" + assert_contains "$calls" "RuleCount" + assert_contains "$calls" "TargetGroupCount" +} + +@test "warns when CloudWatch put-metric-data fails" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/123"}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[]}' ;; + *"put-metric-data"*) return 1 ;; + esac + } + export -f aws + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: failed to publish to CloudWatch" +} + +# ============================================================================= +# Datadog success +# ============================================================================= + +@test "publishes to Datadog with correct counts" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + export DATADOG_SITE="datadoghq.com" + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics published to Datadog (rules: 3, target_groups: 5)" +} + +@test "Datadog request uses correct endpoint and metric names" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + export DATADOG_SITE="datadoghq.eu" + + run_script + local calls=$(cat "$CURL_CALLS_LOG") + assert_contains "$calls" "https://api.datadoghq.eu/api/v2/series" + assert_contains "$calls" "nullplatform.applicationelb.rule_count" + assert_contains "$calls" "nullplatform.applicationelb.target_group_count" + assert_contains "$calls" "alb_name:k8s-nullplatform-internet-facing" +} + +@test "warns when DATADOG_API_KEY not set" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + unset DATADOG_API_KEY + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: DATADOG_API_KEY not set" +} + +@test "warns when Datadog returns non-202" { + export ALB_METRICS_PUBLISH_TARGET="datadog" + export DATADOG_API_KEY="test-api-key" + + curl() { + echo "403" + } + export -f curl + + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: failed to publish to Datadog (HTTP 403)" +} + +# ============================================================================= +# Unknown target +# ============================================================================= + +@test "warns on unknown metrics target" { + export ALB_METRICS_PUBLISH_TARGET="prometheus" + run_script + assert_equal "$status" "0" + assert_contains "$output" "ALB metrics: unknown target 'prometheus'" +} + +# ============================================================================= +# Rule counting logic +# ============================================================================= + +@test "excludes default rules from count" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/123"}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[{}]}' ;; + *"put-metric-data"*) return 0 ;; + esac + } + export -f aws + + run_script + assert_contains "$output" "rules: 1, target_groups: 1" +} + +@test "counts rules across multiple listeners" { + aws() { + echo "$*" >> "$AWS_CALLS_LOG" + case "$*" in + *"describe-load-balancers"*) echo "arn:aws:elasticloadbalancing:us-east-1:123:lb/abc" ;; + *"describe-listeners"*) echo '{"Listeners":[{"ListenerArn":"arn:listener/1"},{"ListenerArn":"arn:listener/2"}]}' ;; + *"describe-rules"*"listener/1"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false}]}' ;; + *"describe-rules"*"listener/2"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false}]}' ;; + *"describe-rules"*) echo '{"Rules":[{"IsDefault":true},{"IsDefault":false},{"IsDefault":false}]}' ;; + *"describe-target-groups"*) echo '{"TargetGroups":[{},{}]}' ;; + *"put-metric-data"*) return 0 ;; + esac + } + export -f aws + + run_script + assert_contains "$output" "rules: 3, target_groups: 2" +} diff --git a/k8s/deployment/workflows/finalize.yaml b/k8s/deployment/workflows/finalize.yaml index 3974b329..c0b827c9 100644 --- a/k8s/deployment/workflows/finalize.yaml +++ b/k8s/deployment/workflows/finalize.yaml @@ -54,11 +54,17 @@ steps: type: file file: "$OUTPUT_DIR/ingress-$SCOPE_ID-$DEPLOYMENT_ID.yaml" post: - name: verify_networking_reconciliation - type: script - file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" - configuration: - VERIFY_WEIGHTS: false + name: post_apply_checks + type: workflow + steps: + - name: verify_networking_reconciliation + type: script + file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" + configuration: + VERIFY_WEIGHTS: false + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" - name: build deployment type: script file: "$SERVICE_PATH/deployment/build_blue_deployment" diff --git a/k8s/deployment/workflows/initial.yaml b/k8s/deployment/workflows/initial.yaml index 22032272..b7bc8134 100644 --- a/k8s/deployment/workflows/initial.yaml +++ b/k8s/deployment/workflows/initial.yaml @@ -74,6 +74,9 @@ steps: file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" configuration: VERIFY_WEIGHTS: false + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" - name: wait deployment active type: script file: "$SERVICE_PATH/deployment/wait_deployment_active" diff --git a/k8s/deployment/workflows/switch_traffic.yaml b/k8s/deployment/workflows/switch_traffic.yaml index 7e8054ab..54f90bdf 100644 --- a/k8s/deployment/workflows/switch_traffic.yaml +++ b/k8s/deployment/workflows/switch_traffic.yaml @@ -57,8 +57,14 @@ steps: ACTION: apply DRY_RUN: false post: - name: verify_networking_reconciliation - type: script - file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" - configuration: - VERIFY_WEIGHTS: true + name: post_apply_checks + type: workflow + steps: + - name: verify_networking_reconciliation + type: script + file: "$SERVICE_PATH/deployment/verify_networking_reconciliation" + configuration: + VERIFY_WEIGHTS: true + - name: publish_alb_metrics + type: script + file: "$SERVICE_PATH/deployment/publish_alb_metrics" diff --git a/k8s/values.yaml b/k8s/values.yaml index a07e8db8..020b6059 100644 --- a/k8s/values.yaml +++ b/k8s/values.yaml @@ -12,6 +12,8 @@ configuration: ALB_RECONCILIATION_ENABLED: false ALB_MAX_CAPACITY: 75 ALB_MAX_TARGET_GROUPS: 98 + ALB_METRICS_PUBLISH_ENABLED: false +# ALB_METRICS_PUBLISH_TARGET: cloudwatch # Available values: cloudwatch | datadog DEPLOYMENT_MAX_WAIT_IN_SECONDS: 600 DEPLOYMENT_TEMPLATE: "$SERVICE_PATH/deployment/templates/deployment.yaml.tpl" SECRET_TEMPLATE: "$SERVICE_PATH/deployment/templates/secret.yaml.tpl"