Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 77 additions & 5 deletions test/extended-priv/mco_bootimages.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import (
e2e "k8s.io/kubernetes/test/e2e/framework"
)

const mapiBaseErrorMessageTemplate = `1 Degraded MAPI MachineSets | 0 Degraded ControlPlaneMachineSets | 0 Degraded CAPI MachineSets | 0 Degraded CAPI MachineDeployments | Error(s):` +
` error syncing MAPI MachineSet %s: failed to reconcile machineset %s, err:`
const mapiBaseErrorMessageTemplate = `1 Degraded MAPI MachineSets | 0 Degraded ControlPlaneMachineSets | 0 Degraded CAPI MachineSets | 0 CAPI MachineDeployments | Error(s):` +
` error syncing MAPI MachineSet %s:`

const mapiReconcileErrorMessageTemplate = mapiBaseErrorMessageTemplate + ` failed to reconcile machineset %s, err:`

var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive][Serial][Disruptive] MCO Bootimages", func() {
defer g.GinkgoRecover()
Expand Down Expand Up @@ -410,7 +412,7 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
clonedMSName = fmt.Sprintf("cloned-tc-%s-copy", GetCurrentTestPolarionIDNumber())
clonedSecretName = fmt.Sprintf("cloned-user-data-%s-copy", GetCurrentTestPolarionIDNumber())
// We make the the regexp end in a "$" to make sure that no more versions than the expected ones are present
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiBaseErrorMessageTemplate+
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiReconcileErrorMessageTemplate+
` error grabbing user data secret referenced in machineset: secrets "%s" not found`, clonedMSName, clonedMSName, clonedSecretName)) + "$"
)

Expand All @@ -423,7 +425,7 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
clonedMSName = fmt.Sprintf("cloned-tc-%s-copy", GetCurrentTestPolarionIDNumber())
clonedSecretName = fmt.Sprintf("cloned-user-data-%s-copy", GetCurrentTestPolarionIDNumber())
// We make the the regexp end in a "$" to make sure that no more versions than the expected ones are present
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiBaseErrorMessageTemplate+
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiReconcileErrorMessageTemplate+
" failed to unmarshal decoded user-data to json (secret %s): invalid character 'h' in literal true (expecting 'r')t", clonedMSName, clonedMSName, clonedSecretName)) + "$"
)

Expand All @@ -441,7 +443,7 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
clonedMSName = fmt.Sprintf("cloned-tc-%s-copy", GetCurrentTestPolarionIDNumber())
clonedSecretName = fmt.Sprintf("cloned-user-data-%s-copy", GetCurrentTestPolarionIDNumber())
// We make the the regexp end in a "$" to make sure that no more versions than the expected ones are present
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiBaseErrorMessageTemplate+
expectedFailedMessageRegexp = regexp.QuoteMeta(fmt.Sprintf(mapiReconcileErrorMessageTemplate+
" converting ignition stub failed: failed to parse Ignition config: parsing Ignition config failed:"+
" unknown version. Supported spec versions: 2.2,3.0,3.1,3.2,3.3,3.4,3.5", clonedMSName, clonedMSName)) + "$"
)
Expand Down Expand Up @@ -679,6 +681,76 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
exutil.By("Set the original architecture in the cloneed machineset")
setArchitectureAndCheckStatus(clonedMS, machineConfiguration, arch.String())
})

g.It("[PolarionID:89596][OTP] ManagedBootImages. Hot loop detection and recovery", g.Label("Platform:aws", "Platform:gce", "Platform:vsphere", "Platform:azure"), func() {
var (
clonedMSName = fmt.Sprintf("cloned-tc-%s-%s", GetCurrentTestPolarionIDNumber(), exutil.GetRandomString()[:5])
machineSet = NewMachineSetList(oc.AsAdmin(), MachineAPINamespace).GetAllOrFail()[0]
fakeImageName = getBackdatedBootImage(oc.AsAdmin())
expectedHotLoopMessageRegex = regexp.QuoteMeta(fmt.Sprintf(mapiBaseErrorMessageTemplate+
" refusing to reconcile machineset %s, hot loop detected.", clonedMSName, clonedMSName))
)

exutil.By("Opt-in boot images update")
o.Expect(
machineConfiguration.SetAllManagedBootImagesConfig(MachineSetResource),
).To(o.Succeed(), "Error configuring ALL managedBootImages in the 'cluster' MachineConfiguration resource")
logger.Infof("OK!\n")

exutil.By("Clone the first machineset")
clonedMS, err := machineSet.Duplicate(clonedMSName)
o.Expect(err).NotTo(o.HaveOccurred(), "Error duplicating %s", machineSet)
defer clonedMS.Delete()

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Handle deferred cleanup errors explicitly.

defer clonedMS.Delete() ignores the returned error (Line 703). In this disruptive test, failed cleanup can leak resources and cause cross-test instability.

Suggested fix
-		defer clonedMS.Delete()
+		defer func() {
+			o.Expect(clonedMS.Delete()).To(o.Succeed(), "Failed to delete cloned machineset %s", clonedMSName)
+		}()

As per coding guidelines, Go code should never ignore error returns.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
defer clonedMS.Delete()
defer func() {
o.Expect(clonedMS.Delete()).To(o.Succeed(), "Failed to delete cloned machineset %s", clonedMSName)
}()
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended-priv/mco_bootimages.go` at line 703, The `defer
clonedMS.Delete()` statement ignores the error returned by the Delete() method,
which violates Go coding guidelines and can cause resource leaks in this
disruptive test. Modify the deferred cleanup to capture and explicitly handle
the error returned by clonedMS.Delete() - this typically involves logging the
error or checking it to ensure cleanup failures are not silently ignored and can
be traced during test debugging.

Source: Coding guidelines

logger.Infof("OK!\n")

exutil.By("Trigger hot loop by repeatedly setting a wrong boot image")
// HotLoopLimit is 3. After 3 patches to the same target value by MCO,
// the next attempt to patch triggers the hot loop detection.
for i := 1; i <= 3; i++ {
logger.Infof("Hot loop iteration %d/3: setting wrong boot image", i)
o.Expect(clonedMS.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(),
"Error setting a fake boot image in %s (iteration %d)", clonedMS, i)

logger.Infof("Waiting for MCO to fix the boot image (iteration %d)", i)
o.Eventually(clonedMS.GetCoreOsBootImage, "5m", "20s").ShouldNot(o.Equal(fakeImageName),
"MCO did not fix the boot image in %s (iteration %d)", clonedMS, i)
}
logger.Infof("OK!\n")

exutil.By("Set wrong boot image one more time to trigger the hot loop error")
o.Expect(clonedMS.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(),
"Error setting a fake boot image in %s", clonedMS)
logger.Infof("OK!\n")

exutil.By("Check that the hot loop error is reported in the MachineConfiguration resource")
o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "True"),
"Expected %s to be BootImageUpdateDegraded due to hot loop.\n%s", machineConfiguration.PrettyString())

o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "message", o.MatchRegexp(expectedHotLoopMessageRegex)),
"Expected hot loop error message in %s.\n%s", machineConfiguration.PrettyString())
Comment on lines +727 to +730

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Fix assertion format strings with missing arguments.

These failure messages use two %s placeholders but pass one argument, so diagnostics will contain %!s(MISSING) and be harder to debug.

Suggested fix
-		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "True"),
-			"Expected %s to be BootImageUpdateDegraded due to hot loop.\n%s", machineConfiguration.PrettyString())
+		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "True"),
+			"Expected BootImageUpdateDegraded=True due to hot loop.\n%s", machineConfiguration.PrettyString())

-		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "message", o.MatchRegexp(expectedHotLoopMessageRegex)),
-			"Expected hot loop error message in %s.\n%s", machineConfiguration.PrettyString())
+		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "message", o.MatchRegexp(expectedHotLoopMessageRegex)),
+			"Expected hot loop error message.\n%s", machineConfiguration.PrettyString())

-		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "False"),
-			"Expected %s NOT to be BootImageUpdateDegraded after clearing hot loop.\n%s", machineConfiguration.PrettyString())
+		o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "False"),
+			"Expected BootImageUpdateDegraded=False after clearing hot loop.\n%s", machineConfiguration.PrettyString())

Also applies to: 746-747

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended-priv/mco_bootimages.go` around lines 727 - 730, The failure
message format strings in the assertion calls contain two %s placeholders each,
but only one argument (machineConfiguration.PrettyString()) is provided to each.
This causes Go's formatter to output %!s(MISSING) in the diagnostics, making
debugging harder. Fix both assertions by either removing the second %s
placeholder from each format string or providing an appropriate second argument
to match the number of placeholders. This issue appears in the block around
lines 727-730 and also applies to similar assertions around lines 746-747.

logger.Infof("OK!\n")

exutil.By("Opt-out to clear the hot loop state")
o.Expect(
machineConfiguration.SetNoneManagedBootImagesConfig(MachineSetResource),
).To(o.Succeed(), "Error configuring None managedBootImages in the 'cluster' MachineConfiguration resource")
logger.Infof("OK!\n")

exutil.By("Opt-in again to verify the hot loop state was cleared")
o.Expect(
machineConfiguration.SetAllManagedBootImagesConfig(MachineSetResource),
).To(o.Succeed(), "Error configuring ALL managedBootImages in the 'cluster' MachineConfiguration resource")
logger.Infof("OK!\n")

exutil.By("Check that the degraded status is cleared and the boot image is restored")
o.Eventually(machineConfiguration, "5m", "20s").Should(HaveConditionField("BootImageUpdateDegraded", "status", "False"),
"Expected %s NOT to be BootImageUpdateDegraded after clearing hot loop.\n%s", machineConfiguration.PrettyString())

o.Eventually(clonedMS.GetCoreOsBootImage, "5m", "20s").ShouldNot(o.Equal(fakeImageName),
"MCO should have fixed the boot image in %s after clearing the hot loop", clonedMS)
CheckCurrentOSImageIsUpdated(clonedMS)
logger.Infof("OK!\n")
})
})

func DuplicateMachineSetWithCustomBootImage(ms *MachineSet, newBootImage, newName string) (*MachineSet, error) {
Expand Down