From 989e3910eb53c0e94d0b21b8cbd1de0f7e9d4dd4 Mon Sep 17 00:00:00 2001 From: Justin Bradfield Date: Mon, 11 May 2026 15:39:14 -0500 Subject: [PATCH 1/3] orchestrator-kubernetes: suppress minDomains when availability_zones is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `availability_zones` pins pods to specific AZs via node affinity, the number of eligible topology domains is constrained. If `minDomains` exceeds the number of pinned zones, Kubernetes treats the global minimum as 0, and with `maxSkew=1` only one pod can be scheduled — leaving additional replicas stuck pending. Fix by suppressing `minDomains` in the topology spread constraint whenever `availability_zones` is set, matching the existing behavior for soft spread constraints. Fixes CLO-74 Co-Authored-By: Claude Sonnet 4.6 --- src/orchestrator-kubernetes/src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/orchestrator-kubernetes/src/lib.rs b/src/orchestrator-kubernetes/src/lib.rs index 474beb3604228..84bef97e5cb9b 100644 --- a/src/orchestrator-kubernetes/src/lib.rs +++ b/src/orchestrator-kubernetes/src/lib.rs @@ -791,10 +791,17 @@ impl NamespacedOrchestrator for NamespacedKubernetesOrchestrator { so min_domains will be ignored" ); } + if availability_zones.is_some() && config.min_domains.is_some() { + warn!( + "topology spread has min_domains set but availability_zones \ + constrains eligible topology domains via node affinity; \ + minDomains will be ignored to avoid preventing pod scheduling" + ); + } let constraint = TopologySpreadConstraint { label_selector: Some(ls), - min_domains: if config.soft { + min_domains: if config.soft || availability_zones.is_some() { None } else { config.min_domains From 3eefbacc6191e174cd1f99abfd47e975cd518bdf Mon Sep 17 00:00:00 2001 From: Justin Bradfield Date: Mon, 11 May 2026 15:57:06 -0500 Subject: [PATCH 2/3] orchestrator-kubernetes: add test for minDomains suppression logic Extract topology_spread_min_domains helper and add unit test covering the four suppression cases: soft spread, az-pinned, both, and neither. Co-Authored-By: Claude Sonnet 4.6 --- src/orchestrator-kubernetes/src/lib.rs | 46 +++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/orchestrator-kubernetes/src/lib.rs b/src/orchestrator-kubernetes/src/lib.rs index 84bef97e5cb9b..e0b59698cb484 100644 --- a/src/orchestrator-kubernetes/src/lib.rs +++ b/src/orchestrator-kubernetes/src/lib.rs @@ -801,11 +801,11 @@ impl NamespacedOrchestrator for NamespacedKubernetesOrchestrator { let constraint = TopologySpreadConstraint { label_selector: Some(ls), - min_domains: if config.soft || availability_zones.is_some() { - None - } else { - config.min_domains - }, + min_domains: topology_spread_min_domains( + config.soft, + availability_zones.is_some(), + config.min_domains, + ), max_skew: config.max_skew, topology_key: "topology.kubernetes.io/zone".to_string(), when_unsatisfiable: if config.soft { @@ -1765,10 +1765,46 @@ impl Service for KubernetesService { } } +/// Returns the `minDomains` value for a `TopologySpreadConstraint`. +/// +/// `minDomains` must be suppressed when spread is soft (Kubernetes rejects +/// `minDomains` with `ScheduleAnyway`) and when `availability_zones` is set +/// (node affinity already constrains eligible domains; if `minDomains` exceeds +/// the number of pinned zones the global minimum is treated as 0, causing all +/// but one replica to remain pending with `maxSkew=1`). +fn topology_spread_min_domains( + soft: bool, + az_pinned: bool, + min_domains: Option, +) -> Option { + if soft || az_pinned { + None + } else { + min_domains + } +} + #[cfg(test)] mod tests { use super::*; + #[mz_ore::test] + fn topology_spread_min_domains_suppression() { + // min_domains is kept when neither soft nor az-pinned + assert_eq!( + topology_spread_min_domains(false, false, Some(3)), + Some(3) + ); + // min_domains is None when not set regardless of flags + assert_eq!(topology_spread_min_domains(false, false, None), None); + // suppressed when soft (Kubernetes rejects minDomains with ScheduleAnyway) + assert_eq!(topology_spread_min_domains(true, false, Some(3)), None); + // suppressed when availability_zones pins to specific AZs + assert_eq!(topology_spread_min_domains(false, true, Some(3)), None); + // suppressed when both + assert_eq!(topology_spread_min_domains(true, true, Some(3)), None); + } + #[mz_ore::test] fn k8s_quantity_base10_large() { let cases = &[ From f47e6e4d77d63ee27eff3d4786b3319bf2d4926b Mon Sep 17 00:00:00 2001 From: Justin Bradfield Date: Mon, 11 May 2026 16:04:08 -0500 Subject: [PATCH 3/3] orchestrator-kubernetes: fix rustfmt Co-Authored-By: Claude Sonnet 4.6 --- src/orchestrator-kubernetes/src/lib.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/orchestrator-kubernetes/src/lib.rs b/src/orchestrator-kubernetes/src/lib.rs index e0b59698cb484..62c9cb5a2e9db 100644 --- a/src/orchestrator-kubernetes/src/lib.rs +++ b/src/orchestrator-kubernetes/src/lib.rs @@ -1777,11 +1777,7 @@ fn topology_spread_min_domains( az_pinned: bool, min_domains: Option, ) -> Option { - if soft || az_pinned { - None - } else { - min_domains - } + if soft || az_pinned { None } else { min_domains } } #[cfg(test)] @@ -1791,10 +1787,7 @@ mod tests { #[mz_ore::test] fn topology_spread_min_domains_suppression() { // min_domains is kept when neither soft nor az-pinned - assert_eq!( - topology_spread_min_domains(false, false, Some(3)), - Some(3) - ); + assert_eq!(topology_spread_min_domains(false, false, Some(3)), Some(3)); // min_domains is None when not set regardless of flags assert_eq!(topology_spread_min_domains(false, false, None), None); // suppressed when soft (Kubernetes rejects minDomains with ScheduleAnyway)