From fe899166ae873a74eebdca1eb10274eecb859c26 Mon Sep 17 00:00:00 2001 From: Jamo Luhrsen Date: Mon, 1 Jun 2026 13:45:17 -0700 Subject: [PATCH] OCPBUGS-85677: avoid Progressing during node reboot pod recreation The counter check (UpdatedNumberScheduled < CurrentNumberScheduled) correctly detects DaemonSet rollouts but also triggers during node reboots when pods are recreated without any DaemonSet spec change. Use generation > observedGeneration only as the short-lived signal that a CNO-initiated DaemonSet rollout has started, then keep tracking that rollout via the existing last-seen state while updated pods are still behind current pods. This filters transient pod churn after install without losing legitimate rollout progress once the DaemonSet controller has observed the new generation. Fixes false positives during MCO node reboots with ipsec machine configs. Signed-off-by Jamo Luhrsen Co-authored-by Claude Code --- pkg/controller/statusmanager/pod_status.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/controller/statusmanager/pod_status.go b/pkg/controller/statusmanager/pod_status.go index 6e83c910f9..490650d3e8 100644 --- a/pkg/controller/statusmanager/pod_status.go +++ b/pkg/controller/statusmanager/pod_status.go @@ -94,18 +94,19 @@ func (status *StatusManager) SetFromPods() { for _, ds := range daemonSets { dsName := NewClusteredName(ds) dsState, hadState := daemonsetStates[dsName] - dsRolloutActive := !status.installComplete || ds.Status.UpdatedNumberScheduled < ds.Status.CurrentNumberScheduled + dsRolloutPending := ds.Generation > ds.Status.ObservedGeneration + dsRolloutActive := !status.installComplete || dsRolloutPending || (hadState && ds.Status.UpdatedNumberScheduled < ds.Status.CurrentNumberScheduled) dsProgressing := false if isNonCritical(ds) && ds.Status.NumberReady == 0 && !status.installComplete { progressing = append(progressing, fmt.Sprintf("DaemonSet %q is waiting for other operators to become ready", dsName.String())) dsProgressing = true - } else if ds.Status.UpdatedNumberScheduled < ds.Status.CurrentNumberScheduled { + } else if ds.Status.UpdatedNumberScheduled < ds.Status.CurrentNumberScheduled && dsRolloutActive { progressing = append(progressing, fmt.Sprintf("DaemonSet %q update is rolling out (%d out of %d updated)", dsName.String(), ds.Status.UpdatedNumberScheduled, ds.Status.CurrentNumberScheduled)) dsProgressing = true } else if ds.Status.NumberUnavailable > 0 { - if dsRolloutActive { + if dsRolloutActive || hadState { progressing = append(progressing, fmt.Sprintf("DaemonSet %q is not available (awaiting %d nodes)", dsName.String(), ds.Status.NumberUnavailable)) dsProgressing = true } @@ -123,7 +124,8 @@ func (status *StatusManager) SetFromPods() { var dsHung *string - if dsProgressing && !isNonCritical(ds) { + trackDSRollout := (dsProgressing && !isNonCritical(ds)) || dsRolloutPending + if trackDSRollout { reachedAvailableLevel = false if !hadState || !reflect.DeepEqual(dsState.LastSeenStatus, ds.Status) { @@ -132,13 +134,13 @@ func (status *StatusManager) SetFromPods() { } // Catch hung rollouts - if hadState && (time.Since(dsState.LastChangeTime)) > ProgressTimeout { + if dsProgressing && !isNonCritical(ds) && hadState && (time.Since(dsState.LastChangeTime)) > ProgressTimeout { hung = append(hung, fmt.Sprintf("DaemonSet %q rollout is not making progress - last change %s", dsName.String(), dsState.LastChangeTime.Format(time.RFC3339))) empty := "" dsHung = &empty } } - if dsProgressing && !isNonCritical(ds) { + if trackDSRollout { daemonsetStates[dsName] = dsState } else { delete(daemonsetStates, dsName)