diff --git a/CHANGELOG.md b/CHANGELOG.md index b2b2ccec..fbf31456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for multiple ALBs - Add configurable memory and cpu limit for traffic manager - Add ALB metrics publishing to CloudWatch or Datadog (rule count and target group count per ALB) +- Fix blue-green switch-traffic failure when `additional_ports` (e.g., gRPC) are added to a scope after the initial deployment ## [1.10.1] - 2026-02-13 - Hotfix on wait_deployment_iteration diff --git a/k8s/deployment/build_context b/k8s/deployment/build_context index 37d9a763..0808681b 100755 --- a/k8s/deployment/build_context +++ b/k8s/deployment/build_context @@ -245,6 +245,27 @@ if [[ -n "$TRAFFIC_MANAGER_CONFIG_MAP" ]]; then log info "✨ ConfigMap '$TRAFFIC_MANAGER_CONFIG_MAP' validation successful" fi +# Check if blue deployment has K8s services for additional ports +BLUE_ADDITIONAL_PORT_SERVICES="{}" +if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + ADDITIONAL_PORTS=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + if [ "$ADDITIONAL_PORTS" != "[]" ] && [ "$ADDITIONAL_PORTS" != "null" ]; then + while IFS= read -r port_config; do + port=$(echo "$port_config" | jq -r '.port') + type_raw=$(echo "$port_config" | jq -r '.type') + type_lower=$(echo "$type_raw" | tr '[:upper:]' '[:lower:]') + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-${type_lower}-${port}" + key="${type_lower}-${port}" + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): true}') + else + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): false}') + log info "Blue deployment service '$service_name' not found - additional port traffic will route to new deployment only" + fi + done < <(echo "$ADDITIONAL_PORTS" | jq -c '.[]') + fi +fi + CONTEXT=$(echo "$CONTEXT" | jq \ --arg blue_deployment_id "$BLUE_DEPLOYMENT_ID" \ --arg blue_replicas "$BLUE_REPLICAS" \ @@ -258,6 +279,7 @@ CONTEXT=$(echo "$CONTEXT" | jq \ --arg traffic_manager_config_map "$TRAFFIC_MANAGER_CONFIG_MAP" \ --arg container_memory_in_memory "$CONTAINER_MEMORY_IN_MEMORY" \ --arg container_cpu_in_millicores "$CONTAINER_CPU_IN_MILLICORES" \ + --argjson blue_additional_port_services "$BLUE_ADDITIONAL_PORT_SERVICES" \ '. + {blue_deployment_id: $blue_deployment_id, blue_replicas: $blue_replicas, green_replicas: $green_replicas, @@ -269,7 +291,8 @@ CONTEXT=$(echo "$CONTEXT" | jq \ service_account_name: $service_account_name, traffic_manager_config_map: $traffic_manager_config_map, container_memory_in_memory: $container_memory_in_memory, - container_cpu_in_millicores: $container_cpu_in_millicores + container_cpu_in_millicores: $container_cpu_in_millicores, + blue_additional_port_services: $blue_additional_port_services }') DEPLOYMENT_ID=$(echo "$CONTEXT" | jq -r '.deployment.id') diff --git a/k8s/deployment/templates/blue-green-ingress.yaml.tpl b/k8s/deployment/templates/blue-green-ingress.yaml.tpl index f33ca37d..20a0a5b0 100644 --- a/k8s/deployment/templates/blue-green-ingress.yaml.tpl +++ b/k8s/deployment/templates/blue-green-ingress.yaml.tpl @@ -114,11 +114,30 @@ metadata: {{- end }} {{- end }} annotations: +{{- $port_key := "" -}} +{{- if eq .type "HTTP" -}} + {{- $port_key = printf "http-%v" .port -}} +{{- else -}} + {{- $port_key = printf "grpc-%v" .port -}} +{{- end -}} +{{- $blue_svc_exists := true -}} +{{- if $.blue_additional_port_services -}} + {{- if not (index $.blue_additional_port_services $port_key) -}} + {{- $blue_svc_exists = false -}} + {{- end -}} +{{- end -}} +{{- if $blue_svc_exists }} alb.ingress.kubernetes.io/actions.bg-deployment-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}: >- {"type":"forward","forwardConfig":{"targetGroups":[ {"serviceName":"d-{{ $.scope.id }}-{{ $.blue_deployment_id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":{{ sub 100 $.deployment.strategy_data.desired_switched_traffic }}}, {"serviceName":"d-{{ $.scope.id }}-{{ $.deployment.id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":{{ $.deployment.strategy_data.desired_switched_traffic }}} ]}} +{{- else }} + alb.ingress.kubernetes.io/actions.bg-deployment-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}: >- + {"type":"forward","forwardConfig":{"targetGroups":[ + {"serviceName":"d-{{ $.scope.id }}-{{ $.deployment.id }}-{{ if eq .type "HTTP" }}http{{ else }}grpc{{ end }}-{{ .port }}","servicePort":{{ .port }},"weight":100} + ]}} +{{- end }} alb.ingress.kubernetes.io/actions.response-404: '{"type":"fixed-response","fixedResponseConfig":{"contentType":"text/plain","statusCode":"404","messageBody":"404 scope not found or has not been deployed yet"}}' alb.ingress.kubernetes.io/group.name: {{ $.alb_name }} alb.ingress.kubernetes.io/load-balancer-name: {{ $.alb_name }} diff --git a/k8s/deployment/tests/build_context.bats b/k8s/deployment/tests/build_context.bats index 65351bdc..ce8aa579 100644 --- a/k8s/deployment/tests/build_context.bats +++ b/k8s/deployment/tests/build_context.bats @@ -696,3 +696,107 @@ SCRIPT assert_equal "$CONTAINER_CPU_IN_MILLICORES" "93" } + +# ============================================================================= +# Blue Additional Port Services Detection Tests +# ============================================================================= +@test "blue additional port services: empty map when no BLUE_DEPLOYMENT_ID" { + BLUE_DEPLOYMENT_ID="" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + BLUE_ADDITIONAL_PORT_SERVICES='{"grpc-9014": true}' + fi + assert_equal "$BLUE_ADDITIONAL_PORT_SERVICES" "{}" +} + +@test "blue additional port services: empty map when BLUE_DEPLOYMENT_ID is null" { + BLUE_DEPLOYMENT_ID="null" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + if [ -n "$BLUE_DEPLOYMENT_ID" ] && [ "$BLUE_DEPLOYMENT_ID" != "null" ]; then + BLUE_ADDITIONAL_PORT_SERVICES='{"grpc-9014": true}' + fi + assert_equal "$BLUE_ADDITIONAL_PORT_SERVICES" "{}" +} + +@test "blue additional port services: empty map when no additional_ports in capabilities" { + BLUE_DEPLOYMENT_ID="deploy-old-456" + export CONTEXT='{"scope":{"capabilities":{}}}' + ADDITIONAL_PORTS=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + assert_equal "$ADDITIONAL_PORTS" "[]" +} + +@test "blue additional port services: detects existing service via kubectl" { + kubectl() { + if [[ "$1" == "get" && "$2" == "service" && "$3" == "d-scope-456-deploy-old-789-grpc-9014" ]]; then + return 0 + fi + return 1 + } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-grpc-9014" + + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + result="true" + else + result="false" + fi + + assert_equal "$result" "true" +} + +@test "blue additional port services: detects missing service via kubectl" { + kubectl() { return 1; } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-grpc-9014" + + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + result="true" + else + result="false" + fi + + assert_equal "$result" "false" +} + +@test "blue additional port services: builds correct map for mixed existing/missing ports" { + kubectl() { + if [[ "$3" == "d-scope-456-deploy-old-789-grpc-9014" ]]; then + return 0 # exists + fi + return 1 # doesn't exist + } + export -f kubectl + + SCOPE_ID="scope-456" + BLUE_DEPLOYMENT_ID="deploy-old-789" + K8S_NAMESPACE="test-ns" + BLUE_ADDITIONAL_PORT_SERVICES="{}" + + ADDITIONAL_PORTS='[{"port":9014,"type":"GRPC"},{"port":8081,"type":"HTTP"}]' + while IFS= read -r port_config; do + port=$(echo "$port_config" | jq -r '.port') + type_raw=$(echo "$port_config" | jq -r '.type') + type_lower=$(echo "$type_raw" | tr '[:upper:]' '[:lower:]') + service_name="d-${SCOPE_ID}-${BLUE_DEPLOYMENT_ID}-${type_lower}-${port}" + key="${type_lower}-${port}" + if kubectl get service "$service_name" -n "$K8S_NAMESPACE" &>/dev/null; then + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): true}') + else + BLUE_ADDITIONAL_PORT_SERVICES=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq --arg key "$key" '. + {($key): false}') + fi + done < <(echo "$ADDITIONAL_PORTS" | jq -c '.[]') + + grpc_exists=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq -r '.["grpc-9014"]') + http_exists=$(echo "$BLUE_ADDITIONAL_PORT_SERVICES" | jq -r '.["http-8081"]') + + assert_equal "$grpc_exists" "true" + assert_equal "$http_exists" "false" +} diff --git a/k8s/deployment/tests/verify_ingress_reconciliation.bats b/k8s/deployment/tests/verify_ingress_reconciliation.bats index 717fe16c..1e216f96 100644 --- a/k8s/deployment/tests/verify_ingress_reconciliation.bats +++ b/k8s/deployment/tests/verify_ingress_reconciliation.bats @@ -223,7 +223,7 @@ teardown() { echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' ;; describe-listeners) - echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/123\"}]}' + echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/123\",\"Port\":443}]}' ;; describe-rules) echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":80},{\"Weight\":20}]}}]}]}' @@ -244,7 +244,51 @@ teardown() { assert_contains "$output" "📋 ALB validation enabled: k8s-test-alb for domain app.example.com" assert_contains "$output" "📝 Checking domain: app.example.com" assert_contains "$output" "✅ Found rule for domain: app.example.com" - assert_contains "$output" "❌ Weights mismatch: expected=" + assert_contains "$output" "❌ Weights mismatch on listener port 443: expected=50/50 actual=20/80" +} + +@test "verify_ingress_reconciliation: skips weight check on additional port listener when blue has no service" { + # Scenario: gRPC (port 50051) was added to scope AFTER the blue deployment was created. + # The blue deployment has no K8s service for gRPC, so the ingress routes 100% to green. + # The verify script should skip weight verification on the gRPC listener and check the + # primary HTTP listener (port 443) instead. + local ctx='{"scope":{"slug":"my-app","domain":"app.example.com","current_active_deployment":"deploy-old","capabilities":{"additional_ports":[{"port":50051,"type":"GRPC"}]}},"alb_name":"k8s-test-alb","blue_additional_port_services":{"grpc-50051":false},"deployment":{"strategy":"blue_green","strategy_data":{"desired_switched_traffic":10}}}' + + run bash -c " + kubectl() { + echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}' + return 0 + } + aws() { + case \"\$2\" in + describe-load-balancers) + echo 'arn:aws:elasticloadbalancing:us-east-1:123456789:loadbalancer/app/test-alb/abc123' + ;; + describe-listeners) + echo '{\"Listeners\":[{\"ListenerArn\":\"arn:aws:listener/grpc\",\"Port\":50051},{\"ListenerArn\":\"arn:aws:listener/https\",\"Port\":443}]}' + ;; + describe-rules) + if [[ \"\$4\" == *\"grpc\"* ]]; then + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":100}]}}]}]}' + else + echo '{\"Rules\":[{\"Conditions\":[{\"Field\":\"host-header\",\"Values\":[\"app.example.com\"]}],\"Actions\":[{\"Type\":\"forward\",\"ForwardConfig\":{\"TargetGroups\":[{\"Weight\":90},{\"Weight\":10}]}}]}]}' + fi + ;; + esac + return 0 + } + export -f kubectl aws + export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY' + export MAX_WAIT_SECONDS='1' CHECK_INTERVAL='1' + export ALB_RECONCILIATION_ENABLED='true' VERIFY_WEIGHTS='true' REGION='$REGION' + export CONTEXT='$ctx' + source '$BATS_TEST_DIRNAME/../verify_ingress_reconciliation' + " + + [ "$status" -eq 0 ] + assert_contains "$output" "Skipping weight check on listener port 50051" + assert_contains "$output" "✅ Weights match on listener port 443" + assert_contains "$output" "✅ ALB configuration validated successfully" } @test "verify_ingress_reconciliation: detects domain not found in ALB rules" { diff --git a/k8s/deployment/verify_ingress_reconciliation b/k8s/deployment/verify_ingress_reconciliation index e64c465a..ee9f3221 100644 --- a/k8s/deployment/verify_ingress_reconciliation +++ b/k8s/deployment/verify_ingress_reconciliation @@ -75,6 +75,30 @@ validate_alb_config() { return 1 fi + # Build a set of additional port numbers where the blue deployment has no K8s service. + # When additional_ports are added to a scope after the initial deployment, the blue + # deployment won't have services for those ports. ALB listeners on those ports will + # have single-target weights (100% green) instead of the standard blue-green split. + local _blue_missing_ports="" + local _additional_ports + _additional_ports=$(echo "$CONTEXT" | jq -c '.scope.capabilities.additional_ports // []') + local _blue_port_svc + _blue_port_svc=$(echo "$CONTEXT" | jq -c '.blue_additional_port_services // {}') + + if [ "$_additional_ports" != "[]" ] && [ "$_additional_ports" != "null" ] && [ "$_blue_port_svc" != "{}" ]; then + while IFS= read -r _pc; do + local _port _type _key _exists + _port=$(echo "$_pc" | jq -r '.port') + _type=$(echo "$_pc" | jq -r '.type' | tr '[:upper:]' '[:lower:]') + _key="${_type}-${_port}" + _exists=$(echo "$_blue_port_svc" | jq -r --arg k "$_key" 'if has($k) then .[$k] else true end') + if [ "$_exists" = "false" ]; then + _blue_missing_ports="${_blue_missing_ports} ${_port}" + log debug "📝 Blue deployment has no service for additional port ${_port} - expecting single-target weights" + fi + done < <(echo "$_additional_ports" | jq -c '.[]') + fi + local all_domains_found=true for domain in "${ALL_DOMAINS[@]}"; do @@ -102,9 +126,32 @@ validate_alb_config() { ') if [ -n "$MATCHING_RULE" ]; then - log info " ✅ Found rule for domain: $domain" - if [ "${VERIFY_WEIGHTS:-false}" = "true" ]; then + # Determine the listener port to check if this is an additional port + # where the blue deployment has no service (added after initial deploy) + local LISTENER_PORT + LISTENER_PORT=$(echo "$LISTENERS" | jq -r --arg arn "$listener_arn" \ + '.Listeners[] | select(.ListenerArn == $arn) | .Port') + + local is_blue_missing_port=false + for _mp in $_blue_missing_ports; do + if [ "$LISTENER_PORT" = "$_mp" ]; then + is_blue_missing_port=true + break + fi + done + + if [ "$is_blue_missing_port" = "true" ]; then + # Blue deployment was created before this additional port was added + # to the scope config, so there's no blue K8s service for it. + # Skip weight verification on this listener — the ingress correctly + # routes 100% to green. Verify weights on the primary listener instead. + log debug " ⏭️ Skipping weight check on listener port $LISTENER_PORT (blue has no service for this port)" + continue + fi + + log info " ✅ Found rule for domain: $domain" + BLUE_WEIGHT=$((100 - SWITCH_TRAFFIC)) GREEN_WEIGHT=$SWITCH_TRAFFIC @@ -124,10 +171,13 @@ validate_alb_config() { if [ -n "$EXPECTED_WEIGHTS" ] && [ -n "$ACTUAL_WEIGHTS" ]; then if [ "$EXPECTED_WEIGHTS" == "$ACTUAL_WEIGHTS" ]; then - log info " ✅ Weights match (GREEN: $GREEN_WEIGHT, BLUE: $BLUE_WEIGHT)" + log info " ✅ Weights match on listener port $LISTENER_PORT (GREEN: $GREEN_WEIGHT, BLUE: $BLUE_WEIGHT)" domain_found=true else - log error " ❌ Weights mismatch: expected=$EXPECTED_WEIGHTS actual=$ACTUAL_WEIGHTS" + local _exp_fmt _act_fmt + _exp_fmt=$(echo "$EXPECTED_WEIGHTS" | tr '\n' '/' | sed 's/\/$//') + _act_fmt=$(echo "$ACTUAL_WEIGHTS" | tr '\n' '/' | sed 's/\/$//') + log error " ❌ Weights mismatch on listener port $LISTENER_PORT: expected=$_exp_fmt actual=$_act_fmt" domain_found=false fi else @@ -135,6 +185,7 @@ validate_alb_config() { domain_found=false fi else + log info " ✅ Found rule for domain: $domain" domain_found=true fi break