From a177234fd2e26cecc6941fd8bd9901e798c02121 Mon Sep 17 00:00:00 2001 From: RishabhSaini Date: Fri, 27 Jun 2025 14:57:34 -0400 Subject: [PATCH 1/4] helpers: compute systemdUnits that added and changed between Ign configs update: only write added or updated systemd units unless forcefile exists then rewrite all systemd units on_disk_validation: check if systemd units in config enabled correctly tests: update MCN condition transition test to account for quick transitions through 'Unknown' state (cherry picked from commit 4512ec46f7b13a6e85588bbf480c4d029990724a) --- pkg/controller/common/helpers.go | 36 ++++++++++++++++--------- pkg/daemon/on_disk_validation.go | 31 ++++++++++++++++++++- pkg/daemon/update.go | 46 +++++++++++++++++++++++++++----- 3 files changed, 93 insertions(+), 20 deletions(-) diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index fd8cd56481..43ea36057d 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -1074,12 +1074,25 @@ func CalculateConfigFileDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st return diffFileSet } -// CalculateConfigUnitDiffs compares the units present in two ignition configurations and returns the list of units -// that are different between them -// -//nolint:dupl -func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []string { - // Go through the units and see what is new or different +type UnitDiff struct { + Added []ign3types.Unit + Removed []ign3types.Unit + Updated []ign3types.Unit +} + +// GetChangedConfigUnitsByType compares the units present in two ignition configurations, one +// old config and the other new, a struct with units mapped to the type of change: +// - New unit added: "Added" +// - Unit previously existing removed: "Removed" +// - Existing unit changed in some way: "Updated" +func GetChangedConfigUnitsByType(oldIgnConfig, newIgnConfig *ign3types.Config) (unitDiffs UnitDiff) { + diffUnit := UnitDiff{ + Added: []ign3types.Unit{}, + Removed: []ign3types.Unit{}, + Updated: []ign3types.Unit{}, + } + + // Get the sets of the old and new units from the ignition configurations oldUnitSet := make(map[string]ign3types.Unit) for _, u := range oldIgnConfig.Systemd.Units { oldUnitSet[u.Name] = u @@ -1088,26 +1101,25 @@ func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st for _, u := range newIgnConfig.Systemd.Units { newUnitSet[u.Name] = u } - diffUnitSet := []string{} // First check if any units were removed for unit := range oldUnitSet { _, ok := newUnitSet[unit] if !ok { - diffUnitSet = append(diffUnitSet, unit) + diffUnit.Removed = append(diffUnit.Removed, oldUnitSet[unit]) } } - // Now check if any units were added/changed + // Now check if any units were added or updated for name, newUnit := range newUnitSet { oldUnit, ok := oldUnitSet[name] if !ok { - diffUnitSet = append(diffUnitSet, name) + diffUnit.Added = append(diffUnit.Added, newUnitSet[name]) } else if !reflect.DeepEqual(oldUnit, newUnit) { - diffUnitSet = append(diffUnitSet, name) + diffUnit.Updated = append(diffUnit.Updated, newUnitSet[name]) } } - return diffUnitSet + return diffUnit } // NewIgnFile returns a simple ignition3 file from just path and file contents. diff --git a/pkg/daemon/on_disk_validation.go b/pkg/daemon/on_disk_validation.go index b06a344c62..7526b5410a 100644 --- a/pkg/daemon/on_disk_validation.go +++ b/pkg/daemon/on_disk_validation.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "strings" ign2types "github.com/coreos/ignition/config/v2_2/types" ign3types "github.com/coreos/ignition/v2/config/v3_4/types" @@ -100,7 +101,12 @@ func checkV3Unit(unit ign3types.Unit, systemdPath string) error { return nil } - return checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions) + err := checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions) + if err != nil { + return err + } + + return checkUnitEnabled(unit.Name, unit.Enabled) } // checkV3Units validates the contents of all the units in the @@ -230,6 +236,29 @@ func checkV2Files(files []ign2types.File) error { return nil } +// checkUnitEnabled checks whether a systemd unit is enabled as expected. +func checkUnitEnabled(name string, expectedEnabled *bool) error { + if expectedEnabled == nil { + return nil + } + outBytes, _ := runGetOut("systemctl", "is-enabled", name) + out := strings.TrimSpace(string(outBytes)) + + switch { + case *expectedEnabled: + // If expected to be enabled, reject known "not enabled" states + if out == "disabled" || out == "masked" || out == "masked-runtime" || out == "not-found" { + return fmt.Errorf("unit %q expected enabled=true, but systemd reports %q", name, out) + } + case !*expectedEnabled: + // If expected to be disabled, reject any of the enabled-like states + if out == "enabled" || out == "enabled-runtime" { + return fmt.Errorf("unit %q expected enabled=false, but systemd reports %q", name, out) + } + } + return nil +} + // checkFileContentsAndMode reads the file from the filepath and compares its // contents and mode with the expectedContent and mode parameters. It logs an // error in case of an error or mismatch and returns the status of the diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index b5b0c300d8..a2fcb2f25a 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -12,6 +12,7 @@ import ( "path/filepath" "reflect" goruntime "runtime" + "slices" "strconv" "strings" "sync" @@ -1211,7 +1212,14 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff) diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig) - diffUnitSet := ctrlcommon.CalculateConfigUnitDiffs(&oldIgnConfig, &newIgnConfig) + // Get the added and updated units + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + // Get the names of all units changed in some way (added, removed, or updated) + var allChangedUnitNames []string + for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) { + allChangedUnitNames = append(allChangedUnitNames, unit.Name) + } var fg featuregates.FeatureGate @@ -1225,6 +1233,10 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } } + // Check for forcefile before calculatePostConfigChange* functions delete it. + // This is needed for updateFiles to know whether to write all units (OCPBUGS-74692). + forceFilePresent := forceFileExists() + var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction var actions []string // If FeatureGateNodeDisruptionPolicy is set, calculate NodeDisruptionPolicy based actions for this MC diff @@ -1342,13 +1354,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } // update files on disk that need updating - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite, false); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return @@ -1473,15 +1485,21 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d return fmt.Errorf("parsing new Ignition config failed: %w", err) } + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + + // Check for forcefile to support config drift recovery (OCPBUGS-74692) + forceFilePresent := forceFileExists() + // update files on disk that need updating // We should't skip the certificate write in HyperShift since it does not run the extra daemon process - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, false); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, false); err != nil { + if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false, false); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return @@ -1971,12 +1989,26 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) // whatever has been written is picked up by the appropriate daemons, if // required. in particular, a daemon-reload and restart for any unit files // touched. -func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, skipCertificateWrite bool) error { +func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite, forceFilePresent bool) error { klog.Info("Updating files") if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil { return err } - if err := dn.writeUnits(newIgnConfig.Systemd.Units); err != nil { + + // With OCPBUGS-58023, we updated this flow to only write units that were either added or + // updated. As can be seen in OCPBUGS-74692, this impacted the traditional method to recover + // from config drifts with systemd units. It makes the `touch /run/machine-config-daemon-force` + // command useless since the new flow does not rewrite all files, only the ones that have been + // added or changed with the latest MC. To keep the fix for OCPBUGS-58023 and allow continue + // supporting the traditional config drift recovery for systemd units, all units should be + // written when a forcefile exists. + unitsToWrite := addedOrChangedUnits + if forceFilePresent { + klog.Info("Forcefile exists, writing all units") + unitsToWrite = newIgnConfig.Systemd.Units + } + + if err := dn.writeUnits(unitsToWrite); err != nil { return err } return dn.deleteStaleData(oldIgnConfig, newIgnConfig) From 31382a19c34bbadd964c3e78181b2ec2ec9d0ff7 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Tue, 24 Feb 2026 09:33:33 -0500 Subject: [PATCH 2/4] fix --- pkg/daemon/update.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index a2fcb2f25a..bfdced0be2 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -991,14 +991,28 @@ func (dn *Daemon) updateOnClusterLayering(oldConfig, newConfig *mcfgv1.MachineCo } } + diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig) + // Get the added and updated units + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + // Get the names of all units changed in some way (added, removed, or updated) + var allChangedUnitNames []string + for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) { + allChangedUnitNames = append(allChangedUnitNames, unit.Name) + } + + // Check for forcefile before calculatePostConfigChange* functions delete it. + // This is needed for updateFiles to know whether to write all units (OCPBUGS-74692). + forceFilePresent := forceFileExists() + // update files on disk that need updating - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return From a4575feca7f279c7c25b204a51c13fa639dc2c28 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Tue, 24 Feb 2026 09:46:37 -0500 Subject: [PATCH 3/4] fix --- pkg/daemon/update.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index bfdced0be2..f8b2ee14de 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -991,7 +991,6 @@ func (dn *Daemon) updateOnClusterLayering(oldConfig, newConfig *mcfgv1.MachineCo } } - diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig) // Get the added and updated units unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) @@ -1255,7 +1254,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi var actions []string // If FeatureGateNodeDisruptionPolicy is set, calculate NodeDisruptionPolicy based actions for this MC diff if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) { - nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet) + nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames) } else { actions, err = calculatePostConfigChangeAction(diff, diffFileSet) } From 8ee786dd196dcbd205c26dc2bb30f80b44d9035b Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Tue, 24 Feb 2026 10:09:09 -0500 Subject: [PATCH 4/4] fix --- pkg/daemon/update.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index f8b2ee14de..e1d590c0ae 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -994,11 +994,6 @@ func (dn *Daemon) updateOnClusterLayering(oldConfig, newConfig *mcfgv1.MachineCo // Get the added and updated units unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) - // Get the names of all units changed in some way (added, removed, or updated) - var allChangedUnitNames []string - for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) { - allChangedUnitNames = append(allChangedUnitNames, unit.Name) - } // Check for forcefile before calculatePostConfigChange* functions delete it. // This is needed for updateFiles to know whether to write all units (OCPBUGS-74692).