diff --git a/src/orchestrator-kubernetes/src/lib.rs b/src/orchestrator-kubernetes/src/lib.rs index 474beb3604228..62c9cb5a2e9db 100644 --- a/src/orchestrator-kubernetes/src/lib.rs +++ b/src/orchestrator-kubernetes/src/lib.rs @@ -791,14 +791,21 @@ impl NamespacedOrchestrator for NamespacedKubernetesOrchestrator { so min_domains will be ignored" ); } + if availability_zones.is_some() && config.min_domains.is_some() { + warn!( + "topology spread has min_domains set but availability_zones \ + constrains eligible topology domains via node affinity; \ + minDomains will be ignored to avoid preventing pod scheduling" + ); + } let constraint = TopologySpreadConstraint { label_selector: Some(ls), - min_domains: if config.soft { - None - } else { - config.min_domains - }, + min_domains: topology_spread_min_domains( + config.soft, + availability_zones.is_some(), + config.min_domains, + ), max_skew: config.max_skew, topology_key: "topology.kubernetes.io/zone".to_string(), when_unsatisfiable: if config.soft { @@ -1758,10 +1765,39 @@ impl Service for KubernetesService { } } +/// Returns the `minDomains` value for a `TopologySpreadConstraint`. +/// +/// `minDomains` must be suppressed when spread is soft (Kubernetes rejects +/// `minDomains` with `ScheduleAnyway`) and when `availability_zones` is set +/// (node affinity already constrains eligible domains; if `minDomains` exceeds +/// the number of pinned zones the global minimum is treated as 0, causing all +/// but one replica to remain pending with `maxSkew=1`). +fn topology_spread_min_domains( + soft: bool, + az_pinned: bool, + min_domains: Option, +) -> Option { + if soft || az_pinned { None } else { min_domains } +} + #[cfg(test)] mod tests { use super::*; + #[mz_ore::test] + fn topology_spread_min_domains_suppression() { + // min_domains is kept when neither soft nor az-pinned + assert_eq!(topology_spread_min_domains(false, false, Some(3)), Some(3)); + // min_domains is None when not set regardless of flags + assert_eq!(topology_spread_min_domains(false, false, None), None); + // suppressed when soft (Kubernetes rejects minDomains with ScheduleAnyway) + assert_eq!(topology_spread_min_domains(true, false, Some(3)), None); + // suppressed when availability_zones pins to specific AZs + assert_eq!(topology_spread_min_domains(false, true, Some(3)), None); + // suppressed when both + assert_eq!(topology_spread_min_domains(true, true, Some(3)), None); + } + #[mz_ore::test] fn k8s_quantity_base10_large() { let cases = &[