Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions k8s/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ Configuration specific to AWS Route53 DNS provider. Visible only when `dns_type`
| **ALB_NAME** (public) | Public Application Load Balancer name | `networking.balancer_public_name` |
| **ALB_NAME** (private) | Private Application Load Balancer name | `networking.balancer_private_name` |
| **ALB_RECONCILIATION_ENABLED** | Whether ALB reconciliation is enabled | `networking.alb_reconciliation_enabled` |
| **ALB_MAX_CAPACITY** | Maximum number of rules allowed on the HTTPS listener before blocking new scopes (default: 75, AWS limit: 100) | `networking.alb_max_capacity` |
| **ALB_MAX_TARGET_GROUPS** | Maximum number of target groups allowed on the ALB before blocking new deployments (default: 90, AWS limit: 100) | `networking.alb_max_target_groups` |
| **ALB_ROLLBACK_ON_RECONCILIATION_FAILURE** | Whether to automatically delete a broken ingress when ALB reconciliation fails, preventing sync poisoning of the entire ALB group (default: true) | `networking.alb_rollback_on_reconciliation_failure` |

#### Azure DNS

Expand Down
50 changes: 50 additions & 0 deletions k8s/deployment/rollback_failed_ingress
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
# Rolls back a failed ingress to prevent sync poisoning of the ALB group.
# When a broken ingress is applied, the ALB Ingress Controller fails to
# reconcile ALL ingresses in the same group.name. By deleting the bad
# ingress, the rest of the group can resume normal reconciliation.
#
# Only deletes ingresses — deployments/services/secrets are left in place
# so that retries don't need to re-create them.

ALB_ROLLBACK_ON_RECONCILIATION_FAILURE="${ALB_ROLLBACK_ON_RECONCILIATION_FAILURE:-true}"

if [[ "$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE" != "true" ]]; then
log debug "📋 Ingress rollback disabled (ALB_ROLLBACK_ON_RECONCILIATION_FAILURE=$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE), skipping"
return 0
fi

if [[ "$DNS_TYPE" != "route53" ]]; then
log debug "📋 DNS type is '$DNS_TYPE', ingress rollback only applies to route53, skipping"
return 0
fi

SCOPE_SLUG=$(echo "$CONTEXT" | jq -r .scope.slug)
INGRESS_NAME="k-8-s-$SCOPE_SLUG-$SCOPE_ID-$INGRESS_VISIBILITY"
ALB_NAME=$(echo "$CONTEXT" | jq -r .alb_name)

log warn "🔄 Rolling back ingress [$INGRESS_NAME] to prevent ALB sync poisoning..."
log warn "📋 ALB group: $ALB_NAME | Namespace: $K8S_NAMESPACE"

# Delete the main ingress
if kubectl delete ingress "$INGRESS_NAME" -n "$K8S_NAMESPACE" --ignore-not-found=true 2>/dev/null; then
log info " ✅ Deleted ingress: $INGRESS_NAME"
else
log warn " ⚠️ Could not delete ingress: $INGRESS_NAME"
fi

# Delete additional port ingresses for this scope (they share the same scope_id label)
ADDITIONAL_INGRESSES=$(kubectl get ingress -n "$K8S_NAMESPACE" -l "scope_id=$SCOPE_ID" \
-o jsonpath='{.items[*].metadata.name}' 2>/dev/null)

for ing_name in $ADDITIONAL_INGRESSES; do
if [[ "$ing_name" != "$INGRESS_NAME" ]]; then
if kubectl delete ingress "$ing_name" -n "$K8S_NAMESPACE" --ignore-not-found=true 2>/dev/null; then
log info " ✅ Deleted additional port ingress: $ing_name"
else
log warn " ⚠️ Could not delete additional port ingress: $ing_name"
fi
fi
done

log warn "🔄 Rollback complete — other scopes on ALB group '$ALB_NAME' should resume normal reconciliation"
194 changes: 194 additions & 0 deletions k8s/deployment/tests/rollback_failed_ingress.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
#!/usr/bin/env bats
# =============================================================================
# Unit tests for deployment/rollback_failed_ingress - ingress rollback on failure
# =============================================================================

setup() {
export PROJECT_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)"
source "$PROJECT_ROOT/testing/assertions.sh"
log() { if [ "$1" = "error" ]; then echo "$2" >&2; else echo "$2"; fi; }
export -f log

export K8S_NAMESPACE="test-namespace"
export SCOPE_ID="scope-123"
export INGRESS_VISIBILITY="internet-facing"
export DNS_TYPE="route53"
export ALB_ROLLBACK_ON_RECONCILIATION_FAILURE="true"

export CONTEXT='{
"scope": {
"slug": "my-app"
},
"alb_name": "k8s-test-alb"
}'
}

teardown() {
unset CONTEXT
}

# =============================================================================
# Success Cases
# =============================================================================
@test "rollback_failed_ingress: deletes main ingress" {
run bash -c "
DELETED_INGRESSES=()
kubectl() {
case \"\$1\" in
delete)
DELETED_INGRESSES+=(\"\$3\")
return 0
;;
get)
echo ''
return 0
;;
esac
return 0
}
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='$DNS_TYPE' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "Rolling back ingress"
assert_contains "$output" "k-8-s-my-app-scope-123-internet-facing"
assert_contains "$output" "Deleted ingress"
assert_contains "$output" "Rollback complete"
}

@test "rollback_failed_ingress: deletes additional port ingresses" {
run bash -c "
kubectl() {
case \"\$1\" in
delete)
echo \"deleted \$3\"
return 0
;;
get)
if [[ \"\$*\" == *\"-l\"* ]]; then
echo 'k-8-s-my-app-scope-123-http-8081-internet-facing k-8-s-my-app-scope-123-grpc-9090-internet-facing'
return 0
fi
echo '{\"metadata\": {\"resourceVersion\": \"12345\"}}'
return 0
;;
esac
return 0
}
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='$DNS_TYPE' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "Deleted ingress: k-8-s-my-app-scope-123-internet-facing"
assert_contains "$output" "Deleted additional port ingress: k-8-s-my-app-scope-123-http-8081-internet-facing"
assert_contains "$output" "Deleted additional port ingress: k-8-s-my-app-scope-123-grpc-9090-internet-facing"
}

# =============================================================================
# Skip Cases
# =============================================================================
@test "rollback_failed_ingress: skips when disabled" {
run bash -c "
kubectl() { echo 'should not be called'; return 1; }
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='$DNS_TYPE' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='false'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "Ingress rollback disabled"
[[ "$output" != *"Rolling back ingress"* ]]
}

@test "rollback_failed_ingress: skips for non-route53 DNS types" {
run bash -c "
kubectl() { echo 'should not be called'; return 1; }
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='azure' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='true'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "ingress rollback only applies to route53"
[[ "$output" != *"Rolling back ingress"* ]]
}

@test "rollback_failed_ingress: skips for external_dns DNS type" {
run bash -c "
kubectl() { echo 'should not be called'; return 1; }
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='external_dns' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='true'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "ingress rollback only applies to route53"
}

# =============================================================================
# Resilience Cases
# =============================================================================
@test "rollback_failed_ingress: handles missing ingress gracefully" {
run bash -c "
kubectl() {
case \"\$1\" in
delete)
return 0
;;
get)
echo ''
return 0
;;
esac
return 0
}
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='$DNS_TYPE' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "Rollback complete"
}

@test "rollback_failed_ingress: continues when kubectl delete fails" {
run bash -c "
kubectl() {
case \"\$1\" in
delete)
return 1
;;
get)
echo 'extra-ingress'
return 0
;;
esac
return 0
}
export -f kubectl
export K8S_NAMESPACE='$K8S_NAMESPACE' SCOPE_ID='$SCOPE_ID' INGRESS_VISIBILITY='$INGRESS_VISIBILITY'
export DNS_TYPE='$DNS_TYPE' ALB_ROLLBACK_ON_RECONCILIATION_FAILURE='$ALB_ROLLBACK_ON_RECONCILIATION_FAILURE'
export CONTEXT='$CONTEXT'
source '$BATS_TEST_DIRNAME/../rollback_failed_ingress'
"

[ "$status" -eq 0 ]
assert_contains "$output" "Could not delete ingress"
assert_contains "$output" "Rollback complete"
}
Loading
Loading