diff --git a/setup/stacks/mgmt.yaml b/setup/stacks/mgmt.yaml index 1bf2abb..ddfa2c1 100644 --- a/setup/stacks/mgmt.yaml +++ b/setup/stacks/mgmt.yaml @@ -36,6 +36,8 @@ spec: labels: azure.workload.identity/use: "true" serviceAccount: "stacks" + nodeSelector: + platform.plural.sh/stack-runner: "true" [[ end ]] git: ref: main diff --git a/terraform/clouds/azure/aks.tf b/terraform/clouds/azure/aks.tf index bc0e4db..436084c 100644 --- a/terraform/clouds/azure/aks.tf +++ b/terraform/clouds/azure/aks.tf @@ -1,23 +1,29 @@ locals { node_pool_add = { (local.active_node_group) = { - orchestrator_version = var.kubernetes_version, - node_taints = local.upgrading ? ["platform.plural.sh/draining=true:NoSchedule"] : [], + orchestrator_version = local.node_orchestrator_version, + node_taints = local.upgrading ? ["platform.plural.sh/draining=true:NoSchedule"] : [], + node_labels = local.upgrading ? {} : { + "platform.plural.sh/stack-runner" = "true" + }, }, - (local.drain_node_group) = { - orchestrator_version = var.next_kubernetes_version, + (local.drain_node_group) = { + orchestrator_version = local.next_kubernetes_version, + node_labels = local.upgrading ? { + "platform.plural.sh/stack-runner" = "true" + } : {}, } } - full_node_pools = {for k, v in var.node_pools: k => merge(v, try(lookup(local.node_pool_add, k), {})) if k != local.drain_node_group || local.upgrading == true} + full_node_pools = { for k, v in var.node_pools : k => merge(v, try(lookup(local.node_pool_add, k), {})) if k != local.drain_node_group || local.upgrading == true } } - module "aks" { source = "Azure/aks/azurerm" version = "9.2.0" - kubernetes_version = var.next_kubernetes_version + kubernetes_version = local.next_kubernetes_version + orchestrator_version = local.node_orchestrator_version cluster_name = var.cluster_name resource_group_name = local.resource_group.name prefix = var.cluster_name @@ -25,8 +31,8 @@ module "aks" { sku_tier = "Standard" rbac_aad = false vnet_subnet_id = azurerm_subnet.network.id - node_pools = {for name, pool in local.full_node_pools : name => merge(pool, {name = name, vnet_subnet_id = azurerm_subnet.network.id})} - + node_pools = { for name, pool in local.full_node_pools : name => merge(pool, { name = name, vnet_subnet_id = azurerm_subnet.network.id }) } + ebpf_data_plane = "cilium" network_plugin_mode = "overlay" network_plugin = "azure" @@ -35,4 +41,4 @@ module "aks" { workload_identity_enabled = var.workload_identity_enabled oidc_issuer_enabled = var.workload_identity_enabled -} \ No newline at end of file +} diff --git a/terraform/clouds/azure/locals.tf b/terraform/clouds/azure/locals.tf index eb4b036..416b2c1 100644 --- a/terraform/clouds/azure/locals.tf +++ b/terraform/clouds/azure/locals.tf @@ -7,9 +7,12 @@ locals { rg = var.create_resource_group ? azurerm_resource_group.main[0] : data.azurerm_resource_group.main[0] db_url = format("postgresql://console:%s@%s:5432/console", random_password.password.result, try(azurerm_postgresql_flexible_server.postgres[0].fqdn, "")) - upgrading = var.kubernetes_version != var.next_kubernetes_version + next_kubernetes_version = var.next_kubernetes_version != "" ? var.next_kubernetes_version : var.kubernetes_version + upgrading = var.kubernetes_version != local.next_kubernetes_version split_vsn = [ for i in split(".", var.kubernetes_version): tonumber(i) ] vsn_even = ((tonumber(local.split_vsn[0]) * 100 + tonumber(local.split_vsn[1])) % 2) == 0 active_node_group = local.vsn_even ? "blue" : "green" drain_node_group = local.vsn_even ? "green" : "blue" + # AKS cannot upgrade CP and node pools in one apply when both versions change. + node_orchestrator_version = local.upgrading ? var.kubernetes_version : local.next_kubernetes_version } \ No newline at end of file diff --git a/terraform/clouds/azure/variables.tf b/terraform/clouds/azure/variables.tf index 524785d..a79ae8e 100644 --- a/terraform/clouds/azure/variables.tf +++ b/terraform/clouds/azure/variables.tf @@ -19,8 +19,9 @@ variable "kubernetes_version" { } variable "next_kubernetes_version" { - type = string - default = "1.34" + type = string + default = "" + description = "AKS control plane target; leave empty to match kubernetes_version." } variable "create_resource_group" { diff --git a/terraform/modules/clusters/azure/aks.tf b/terraform/modules/clusters/azure/aks.tf index 8107819..1679238 100644 --- a/terraform/modules/clusters/azure/aks.tf +++ b/terraform/modules/clusters/azure/aks.tf @@ -2,7 +2,8 @@ module "aks" { source = "Azure/aks/azurerm" version = "9.2.0" - kubernetes_version = var.kubernetes_version + kubernetes_version = local.next_kubernetes_version + orchestrator_version = local.node_orchestrator_version cluster_name = var.cluster resource_group_name = data.azurerm_resource_group.default.name prefix = var.cluster @@ -10,7 +11,13 @@ module "aks" { sku_tier = "Standard" rbac_aad = false vnet_subnet_id = local.network.sn_subnet_id - node_pools = {for name, pool in var.node_pools : name => merge(pool, {name = name, vnet_subnet_id = local.network.sn_subnet_id})} + node_pools = { + for name, pool in var.node_pools : name => merge(pool, { + name = name + vnet_subnet_id = local.network.sn_subnet_id + orchestrator_version = local.node_orchestrator_version + }) + } ebpf_data_plane = "cilium" network_plugin_mode = "overlay" diff --git a/terraform/modules/clusters/azure/locals.tf b/terraform/modules/clusters/azure/locals.tf index febd546..6c1dc6b 100644 --- a/terraform/modules/clusters/azure/locals.tf +++ b/terraform/modules/clusters/azure/locals.tf @@ -1,4 +1,9 @@ locals { - identity = jsondecode(data.plural_service_context.identity.configuration) - network = jsondecode(data.plural_service_context.network.configuration) + identity = jsondecode(data.plural_service_context.identity.configuration) + network = jsondecode(data.plural_service_context.network.configuration) + # Empty next_kubernetes_version means in sync with kubernetes_version (safe before scaffolds passes both). + next_kubernetes_version = var.next_kubernetes_version != "" ? var.next_kubernetes_version : var.kubernetes_version + upgrading = var.kubernetes_version != local.next_kubernetes_version + # AKS upgrades control plane and node pools in separate applies; see clouds/azure/aks.tf. + node_orchestrator_version = local.upgrading ? var.kubernetes_version : local.next_kubernetes_version } diff --git a/terraform/modules/clusters/azure/variables.tf b/terraform/modules/clusters/azure/variables.tf index a032c50..6feda40 100644 --- a/terraform/modules/clusters/azure/variables.tf +++ b/terraform/modules/clusters/azure/variables.tf @@ -16,6 +16,12 @@ variable "kubernetes_version" { default = "1.34" } +variable "next_kubernetes_version" { + type = string + default = "" + description = "AKS control plane target; leave empty to match kubernetes_version." +} + variable "resource_group_name" { type = string default = "plural"