diff --git a/go.mod b/go.mod index e74c2fcd71..415432db60 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,6 @@ require ( github.com/fsnotify/fsnotify v1.7.0 github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 github.com/golangci/golangci-lint v1.59.1 - github.com/google/go-cmp v0.6.0 github.com/google/renameio v0.1.0 github.com/imdario/mergo v0.3.16 github.com/opencontainers/go-digest v1.0.0 @@ -93,6 +92,7 @@ require ( github.com/golangci/plugin-module-register v0.1.1 // indirect github.com/google/btree v1.0.1 // indirect github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect @@ -289,7 +289,7 @@ require ( github.com/securego/gosec/v2 v2.20.1-0.20240525090044-5f0084eb01a9 // indirect github.com/shazow/go-diff v0.0.0-20160112020656-b6b7b6733b8c // indirect github.com/sigstore/sigstore v1.7.5 // indirect - github.com/sirupsen/logrus v1.9.3 + github.com/sirupsen/logrus v1.9.3 // indirect github.com/sivchari/containedctx v1.0.3 // indirect github.com/sivchari/tenv v1.7.1 // indirect github.com/sonatard/noctx v0.0.2 // indirect diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index fd8cd56481..43ea36057d 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -1074,12 +1074,25 @@ func CalculateConfigFileDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st return diffFileSet } -// CalculateConfigUnitDiffs compares the units present in two ignition configurations and returns the list of units -// that are different between them -// -//nolint:dupl -func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []string { - // Go through the units and see what is new or different +type UnitDiff struct { + Added []ign3types.Unit + Removed []ign3types.Unit + Updated []ign3types.Unit +} + +// GetChangedConfigUnitsByType compares the units present in two ignition configurations, one +// old config and the other new, a struct with units mapped to the type of change: +// - New unit added: "Added" +// - Unit previously existing removed: "Removed" +// - Existing unit changed in some way: "Updated" +func GetChangedConfigUnitsByType(oldIgnConfig, newIgnConfig *ign3types.Config) (unitDiffs UnitDiff) { + diffUnit := UnitDiff{ + Added: []ign3types.Unit{}, + Removed: []ign3types.Unit{}, + Updated: []ign3types.Unit{}, + } + + // Get the sets of the old and new units from the ignition configurations oldUnitSet := make(map[string]ign3types.Unit) for _, u := range oldIgnConfig.Systemd.Units { oldUnitSet[u.Name] = u @@ -1088,26 +1101,25 @@ func CalculateConfigUnitDiffs(oldIgnConfig, newIgnConfig *ign3types.Config) []st for _, u := range newIgnConfig.Systemd.Units { newUnitSet[u.Name] = u } - diffUnitSet := []string{} // First check if any units were removed for unit := range oldUnitSet { _, ok := newUnitSet[unit] if !ok { - diffUnitSet = append(diffUnitSet, unit) + diffUnit.Removed = append(diffUnit.Removed, oldUnitSet[unit]) } } - // Now check if any units were added/changed + // Now check if any units were added or updated for name, newUnit := range newUnitSet { oldUnit, ok := oldUnitSet[name] if !ok { - diffUnitSet = append(diffUnitSet, name) + diffUnit.Added = append(diffUnit.Added, newUnitSet[name]) } else if !reflect.DeepEqual(oldUnit, newUnit) { - diffUnitSet = append(diffUnitSet, name) + diffUnit.Updated = append(diffUnit.Updated, newUnitSet[name]) } } - return diffUnitSet + return diffUnit } // NewIgnFile returns a simple ignition3 file from just path and file contents. diff --git a/pkg/daemon/on_disk_validation.go b/pkg/daemon/on_disk_validation.go index b06a344c62..7526b5410a 100644 --- a/pkg/daemon/on_disk_validation.go +++ b/pkg/daemon/on_disk_validation.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "path/filepath" + "strings" ign2types "github.com/coreos/ignition/config/v2_2/types" ign3types "github.com/coreos/ignition/v2/config/v3_4/types" @@ -100,7 +101,12 @@ func checkV3Unit(unit ign3types.Unit, systemdPath string) error { return nil } - return checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions) + err := checkFileContentsAndMode(path, []byte(*unit.Contents), defaultFilePermissions) + if err != nil { + return err + } + + return checkUnitEnabled(unit.Name, unit.Enabled) } // checkV3Units validates the contents of all the units in the @@ -230,6 +236,29 @@ func checkV2Files(files []ign2types.File) error { return nil } +// checkUnitEnabled checks whether a systemd unit is enabled as expected. +func checkUnitEnabled(name string, expectedEnabled *bool) error { + if expectedEnabled == nil { + return nil + } + outBytes, _ := runGetOut("systemctl", "is-enabled", name) + out := strings.TrimSpace(string(outBytes)) + + switch { + case *expectedEnabled: + // If expected to be enabled, reject known "not enabled" states + if out == "disabled" || out == "masked" || out == "masked-runtime" || out == "not-found" { + return fmt.Errorf("unit %q expected enabled=true, but systemd reports %q", name, out) + } + case !*expectedEnabled: + // If expected to be disabled, reject any of the enabled-like states + if out == "enabled" || out == "enabled-runtime" { + return fmt.Errorf("unit %q expected enabled=false, but systemd reports %q", name, out) + } + } + return nil +} + // checkFileContentsAndMode reads the file from the filepath and compares its // contents and mode with the expectedContent and mode parameters. It logs an // error in case of an error or mismatch and returns the status of the diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index b5b0c300d8..e1d590c0ae 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -12,6 +12,7 @@ import ( "path/filepath" "reflect" goruntime "runtime" + "slices" "strconv" "strings" "sync" @@ -990,14 +991,22 @@ func (dn *Daemon) updateOnClusterLayering(oldConfig, newConfig *mcfgv1.MachineCo } } + // Get the added and updated units + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + + // Check for forcefile before calculatePostConfigChange* functions delete it. + // This is needed for updateFiles to know whether to write all units (OCPBUGS-74692). + forceFilePresent := forceFileExists() + // update files on disk that need updating - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return @@ -1211,7 +1220,14 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff) diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig) - diffUnitSet := ctrlcommon.CalculateConfigUnitDiffs(&oldIgnConfig, &newIgnConfig) + // Get the added and updated units + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + // Get the names of all units changed in some way (added, removed, or updated) + var allChangedUnitNames []string + for _, unit := range append(addedOrChangedUnits, unitDiff.Removed...) { + allChangedUnitNames = append(allChangedUnitNames, unit.Name) + } var fg featuregates.FeatureGate @@ -1225,11 +1241,15 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } } + // Check for forcefile before calculatePostConfigChange* functions delete it. + // This is needed for updateFiles to know whether to write all units (OCPBUGS-74692). + forceFilePresent := forceFileExists() + var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction var actions []string // If FeatureGateNodeDisruptionPolicy is set, calculate NodeDisruptionPolicy based actions for this MC diff if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) { - nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet) + nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, allChangedUnitNames) } else { actions, err = calculatePostConfigChangeAction(diff, diffFileSet) } @@ -1342,13 +1362,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } // update files on disk that need updating - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, skipCertificateWrite, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, skipCertificateWrite); err != nil { + if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, skipCertificateWrite, false); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return @@ -1473,15 +1493,21 @@ func (dn *Daemon) updateHypershift(oldConfig, newConfig *mcfgv1.MachineConfig, d return fmt.Errorf("parsing new Ignition config failed: %w", err) } + unitDiff := ctrlcommon.GetChangedConfigUnitsByType(&oldIgnConfig, &newIgnConfig) + addedOrChangedUnits := slices.Concat(unitDiff.Added, unitDiff.Updated) + + // Check for forcefile to support config drift recovery (OCPBUGS-74692) + forceFilePresent := forceFileExists() + // update files on disk that need updating // We should't skip the certificate write in HyperShift since it does not run the extra daemon process - if err := dn.updateFiles(oldIgnConfig, newIgnConfig, false); err != nil { + if err := dn.updateFiles(oldIgnConfig, newIgnConfig, addedOrChangedUnits, false, forceFilePresent); err != nil { return err } defer func() { if retErr != nil { - if err := dn.updateFiles(newIgnConfig, oldIgnConfig, false); err != nil { + if err := dn.updateFiles(newIgnConfig, oldIgnConfig, addedOrChangedUnits, false, false); err != nil { errs := kubeErrs.NewAggregate([]error{err, retErr}) retErr = fmt.Errorf("error rolling back files writes: %w", errs) return @@ -1971,12 +1997,26 @@ func (dn *CoreOSDaemon) switchKernel(oldConfig, newConfig *mcfgv1.MachineConfig) // whatever has been written is picked up by the appropriate daemons, if // required. in particular, a daemon-reload and restart for any unit files // touched. -func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, skipCertificateWrite bool) error { +func (dn *Daemon) updateFiles(oldIgnConfig, newIgnConfig ign3types.Config, addedOrChangedUnits []ign3types.Unit, skipCertificateWrite, forceFilePresent bool) error { klog.Info("Updating files") if err := dn.writeFiles(newIgnConfig.Storage.Files, skipCertificateWrite); err != nil { return err } - if err := dn.writeUnits(newIgnConfig.Systemd.Units); err != nil { + + // With OCPBUGS-58023, we updated this flow to only write units that were either added or + // updated. As can be seen in OCPBUGS-74692, this impacted the traditional method to recover + // from config drifts with systemd units. It makes the `touch /run/machine-config-daemon-force` + // command useless since the new flow does not rewrite all files, only the ones that have been + // added or changed with the latest MC. To keep the fix for OCPBUGS-58023 and allow continue + // supporting the traditional config drift recovery for systemd units, all units should be + // written when a forcefile exists. + unitsToWrite := addedOrChangedUnits + if forceFilePresent { + klog.Info("Forcefile exists, writing all units") + unitsToWrite = newIgnConfig.Systemd.Units + } + + if err := dn.writeUnits(unitsToWrite); err != nil { return err } return dn.deleteStaleData(oldIgnConfig, newIgnConfig)