Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 41 additions & 5 deletions src/orchestrator-kubernetes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -791,14 +791,21 @@ impl NamespacedOrchestrator for NamespacedKubernetesOrchestrator {
so min_domains will be ignored"
);
}
if availability_zones.is_some() && config.min_domains.is_some() {
warn!(
"topology spread has min_domains set but availability_zones \
constrains eligible topology domains via node affinity; \
minDomains will be ignored to avoid preventing pod scheduling"
);
}

let constraint = TopologySpreadConstraint {
label_selector: Some(ls),
min_domains: if config.soft {
None
} else {
config.min_domains
},
min_domains: topology_spread_min_domains(
config.soft,
availability_zones.is_some(),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is fine for clusters of our size, but if we were to pin to say 4 AZs, then having minDomains as 2 or 3 is totally reasonable.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally! It's possible this should just be a var on environmentd to make it more configurable.

config.min_domains,
),
max_skew: config.max_skew,
topology_key: "topology.kubernetes.io/zone".to_string(),
when_unsatisfiable: if config.soft {
Expand Down Expand Up @@ -1758,10 +1765,39 @@ impl Service for KubernetesService {
}
}

/// Returns the `minDomains` value for a `TopologySpreadConstraint`.
///
/// `minDomains` must be suppressed when spread is soft (Kubernetes rejects
/// `minDomains` with `ScheduleAnyway`) and when `availability_zones` is set
/// (node affinity already constrains eligible domains; if `minDomains` exceeds
/// the number of pinned zones the global minimum is treated as 0, causing all
/// but one replica to remain pending with `maxSkew=1`).
fn topology_spread_min_domains(
soft: bool,
az_pinned: bool,
min_domains: Option<i32>,
) -> Option<i32> {
if soft || az_pinned { None } else { min_domains }
}

#[cfg(test)]
mod tests {
use super::*;

#[mz_ore::test]
fn topology_spread_min_domains_suppression() {
// min_domains is kept when neither soft nor az-pinned
assert_eq!(topology_spread_min_domains(false, false, Some(3)), Some(3));
// min_domains is None when not set regardless of flags
assert_eq!(topology_spread_min_domains(false, false, None), None);
// suppressed when soft (Kubernetes rejects minDomains with ScheduleAnyway)
assert_eq!(topology_spread_min_domains(true, false, Some(3)), None);
// suppressed when availability_zones pins to specific AZs
assert_eq!(topology_spread_min_domains(false, true, Some(3)), None);
// suppressed when both
assert_eq!(topology_spread_min_domains(true, true, Some(3)), None);
}

#[mz_ore::test]
fn k8s_quantity_base10_large() {
let cases = &[
Expand Down
Loading