diff --git a/infrastructure/commons/istio/main.tf b/infrastructure/commons/istio/main.tf index fd69f839..3f8c27c5 100644 --- a/infrastructure/commons/istio/main.tf +++ b/infrastructure/commons/istio/main.tf @@ -45,6 +45,21 @@ resource "helm_release" "istiod" { reuse_values = false dependency_update = true max_history = 10 + + # Enforce HA on istiod. The chart's HPA is enabled by default with + # autoscaleMin=1, so setting only replicaCount is not enough — the HPA + # would scale it back to 1 and re-block any node drain (istiod PDB has + # minAvailable=1). Setting autoscaleMin locks in the floor. + set = [ + { + name = "pilot.replicaCount" + value = var.istiod_replicas + }, + { + name = "pilot.autoscaleMin" + value = var.istiod_replicas + }, + ] } # Setup Istio Gateway using Helm diff --git a/infrastructure/commons/istio/variables.tf b/infrastructure/commons/istio/variables.tf index 657b1f0a..875cc3b1 100644 --- a/infrastructure/commons/istio/variables.tf +++ b/infrastructure/commons/istio/variables.tf @@ -20,6 +20,17 @@ variable "istiod_version" { default = "1.27.1" } +variable "istiod_replicas" { + description = "Number of istiod replicas. Default is 1 to preserve the previous behavior of this module for existing consumers; set to 2 (recommended) to let the pilot deployment tolerate node drains — the istiod chart installs a PodDisruptionBudget with minAvailable=1, and a single-replica istiod therefore blocks node rolling updates (e.g. EKS AMI bumps). This value is applied to both pilot.replicaCount and pilot.autoscaleMin; without the autoscaleMin override, the HPA (enabled by default with autoscaleMin=1) would scale back to 1 replica shortly after install." + type = number + default = 1 + + validation { + condition = var.istiod_replicas >= 1 + error_message = "istiod_replicas must be at least 1." + } +} + ############################################################################### # SERVICE CONFIGURATION ###############################################################################