Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,17 @@ func (ctrl *Controller) isMaster(node *corev1.Node) bool {
// Given a master Node, ensure it reflects the current mastersSchedulable
// setting and make sure the control-plane label is set.
func (ctrl *Controller) reconcileMaster(node *corev1.Node) {
err := ctrl.updateMasterNodeControlPlaneLabel(node)
cc, err := ctrl.ccLister.Get(ctrlcommon.ControllerConfigName)
if err != nil {
klog.Errorf("failed to get controller config for master reconciliation: %v", err)
return
}
if cc.Spec.Infra.Status.ControlPlaneTopology == configv1.ExternalTopologyMode {
// HyperShift hosted clusters have no in-cluster control plane nodes.
return
}

err = ctrl.updateMasterNodeControlPlaneLabel(node)
if err != nil {
err = fmt.Errorf("failed adding the control-plane label to master Node: %w", err)
klog.Error(err)
Expand Down
57 changes: 57 additions & 0 deletions pkg/controller/node/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"reflect"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -1591,6 +1592,62 @@ func TestControlPlaneTopology(t *testing.T) {
f.run(getKey(mcp, t))
}

func TestReconcileMasterSkipsExternalTopology(t *testing.T) {
t.Parallel()

f := newFixture(t)
cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.ExternalTopologyMode)
f.ccLister = append(f.ccLister, cc)
f.objects = append(f.objects, cc)

masterNode := newNodeWithLabel("master-node", "", "", map[string]string{
ctrlcommon.MasterLabel: "",
WorkerLabel: "",
})
f.nodeLister = append(f.nodeLister, masterNode)
f.kubeobjects = append(f.kubeobjects, masterNode)

c := f.newController()
c.reconcileMaster(masterNode)

for _, action := range filterInformerActions(f.kubeclient.Actions()) {
if action.GetResource().Resource == "nodes" && (action.GetVerb() == "patch" || action.GetVerb() == "update") {
t.Fatalf("unexpected node %s on External topology cluster: %#v", action.GetVerb(), action)
}
}
}

func TestReconcileMasterAddsControlPlaneLabel(t *testing.T) {
t.Parallel()

f := newFixture(t)
cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.HighlyAvailableTopologyMode)
f.ccLister = append(f.ccLister, cc)
f.objects = append(f.objects, cc)

masterNode := newNodeWithLabel("master-node", "", "", map[string]string{
ctrlcommon.MasterLabel: "",
})
f.nodeLister = append(f.nodeLister, masterNode)
f.kubeobjects = append(f.kubeobjects, masterNode)

c := f.newController()
c.reconcileMaster(masterNode)

patched := false
for _, action := range filterInformerActions(f.kubeclient.Actions()) {
patchAction, ok := action.(core.PatchAction)
if !ok || action.GetResource().Resource != "nodes" {
continue
}
if strings.Contains(string(patchAction.GetPatch()), ControlPlaneLabel) {
patched = true
break
}
}
require.True(t, patched, "control-plane label should be added on non-External topology clusters")
}

// Checks that the layered pool can / should continue. This is based upon the
// results of querying the API for the MachineOSConfig and MachineOSBuild.
func TestCanLayeredPoolContinue(t *testing.T) {
Expand Down
100 changes: 100 additions & 0 deletions test/extended-priv/mco_ocb.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,106 @@ var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive
logger.Infof("OK!\n")
})

g.It("[PolarionID:88202][Skipped:Disconnected] Both off-cluster and on-cluster layering can coexist on the same pool [Disruptive]", func() {
var (
testID = GetCurrentTestPolarionIDNumber()
osLayerMCName = fmt.Sprintf("tc-%s-os-layer", testID)
offClusterFile = "/etc/test-offlayering.test"
onClusterFile = "/etc/test-onlayering.test"
containerFiles = []ContainerFile{{Content: fmt.Sprintf("RUN echo \"on-cluster layering %s\" > %s", testID, onClusterFile)}}
)

mcp := GetCompactCompatiblePool(oc.AsAdmin())
node := mcp.GetSortedNodesOrFail()[0]
offClusterRF := NewRemoteFile(node, offClusterFile)
onClusterRF := NewRemoteFile(node, onClusterFile)

exutil.By("Build a custom OS image for off-cluster layering")
osImageBuilder := OsImageBuilderInNode{
node: node,
dockerFileCommands: fmt.Sprintf("RUN echo \"off-cluster layering %s\" > %s\nRUN ostree container commit", testID, offClusterFile),
}
digestedImage, err := osImageBuilder.CreateAndDigestOsImage()
o.Expect(err).NotTo(o.HaveOccurred(), "Error creating the custom osImage for off-cluster layering")
logger.Infof("Built custom OS image: %s", digestedImage)
logger.Infof("OK!\n")

exutil.By("Apply osImageURL MachineConfig to pool (off-cluster layering)")
osLayerMC := NewMachineConfig(oc.AsAdmin(), osLayerMCName, mcp.GetName())
osLayerMC.parameters = []string{"OS_IMAGE=" + digestedImage}
defer osLayerMC.DeleteWithWait()
osLayerMC.create()
logger.Infof("MachineConfig %s created with osImageURL %s", osLayerMC.GetName(), digestedImage)
logger.Infof("OK!\n")

exutil.By("Verify off-cluster layering is applied on the node")
o.Expect(node.GetCurrentBootOSImage()).To(o.Equal(digestedImage),
"Node %s should be running the off-cluster osImageURL %s", node, digestedImage)
o.Expect(offClusterRF.Exists()).To(o.BeTrue(),
"%s should exist on %s after off-cluster layering", offClusterFile, node)
logger.Infof("Off-cluster layering verified: %s exists on %s", offClusterFile, node)
logger.Infof("OK!\n")

exutil.By("Enable on-cluster layering (OCL) with a containerFile")
mosc, err := CreateMachineOSConfigUsingExternalOrInternalRegistry(oc.AsAdmin(), MachineConfigNamespace, mcp.GetName(), mcp.GetName(), containerFiles)
o.Expect(err).NotTo(o.HaveOccurred(), "Error creating MachineOSConfig for %s", mcp.GetName())
defer DisableOCL(mosc)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify the ignored error return and the function signature
rg -n 'defer\s+DisableOCL\(' test/extended-priv/mco_ocb.go
rg -n '^func\s+DisableOCL\(.+\)\s+error' test/extended-priv/mco_ocb.go

Repository: openshift/machine-config-operator

Length of output: 243


🏁 Script executed:

# Get context on which lines are new in the PR by checking the ranges
# The review mentions lines 238-337 as new test code
# Check if lines 176, 198 are before the new block and 382 is after
sed -n '170,390p' test/extended-priv/mco_ocb.go | cat -n | grep -E '(176|198|281|382):|defer.*DisableOCL'

Repository: openshift/machine-config-operator

Length of output: 204


🏁 Script executed:

# Also check how errors are handled with other deferred calls in the file to understand the pattern
rg -B2 -A2 'defer.*\(' test/extended-priv/mco_ocb.go | head -60

Repository: openshift/machine-config-operator

Length of output: 3051


Handle deferred DisableOCL errors instead of dropping them.

Line 281 defers DisableOCL(mosc) but ignores its error return. If cleanup fails, the spec can leak OCL state and destabilize later tests. Assert this deferred cleanup result explicitly.

Proposed fix
-		defer DisableOCL(mosc)
+		defer func() {
+			o.Expect(DisableOCL(mosc)).To(o.Succeed(), "failed to disable OCL for pool %s", mcp.GetName())
+		}()

Go guideline: **/*.go — "Never ignore error returns".

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
defer DisableOCL(mosc)
defer func() {
o.Expect(DisableOCL(mosc)).To(o.Succeed(), "failed to disable OCL for pool %s", mcp.GetName())
}()
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@test/extended-priv/mco_ocb.go` at line 281, The deferred call to
DisableOCL(mosc) on line 281 is ignoring the error return value that the
function provides. Replace the simple defer statement with a deferred anonymous
function that explicitly captures and handles the error returned by
DisableOCL(mosc) by either logging it, storing it for assertion, or using an
error handling pattern consistent with the test's cleanup strategy, ensuring
that any cleanup failures are properly tracked rather than silently dropped.

Source: Coding guidelines

logger.Infof("OK!\n")

exutil.By("Validate OCL build succeeds")
ValidateSuccessfulMOSC(mosc, nil)
logger.Infof("OK!\n")

exutil.By("Verify both off-cluster and on-cluster layering files are present")
o.Expect(offClusterRF.Exists()).To(o.BeTrue(),
"%s should still exist alongside on-cluster layering", offClusterFile)
o.Expect(onClusterRF.Exists()).To(o.BeTrue(),
"%s should exist after on-cluster layering is applied", onClusterFile)
logger.Infof("Both %s and %s are present on %s", offClusterFile, onClusterFile, node)
logger.Infof("OK!\n")

exutil.By("Record current MOSB before deleting off-cluster MC")
currentMOSB, err := mosc.GetCurrentMachineOSBuild()
o.Expect(err).NotTo(o.HaveOccurred(), "Error getting current MOSB")
currentMOSBName := currentMOSB.GetName()
logger.Infof("Current MOSB before MC deletion: %s", currentMOSBName)
logger.Infof("OK!\n")

exutil.By("Delete the off-cluster MC")
o.Expect(osLayerMC.Delete()).To(o.Succeed(),
"Error deleting off-cluster MachineConfig %s", osLayerMCName)
logger.Infof("Off-cluster MachineConfig %s deleted", osLayerMCName)
logger.Infof("OK!\n")

exutil.By("Wait for a new MachineOSBuild to be triggered after MC deletion")
var newMOSB *MachineOSBuild
o.Eventually(func() string {
mosb, err := mosc.GetCurrentMachineOSBuild()
if err != nil {
return currentMOSBName
}
newMOSB = mosb
return mosb.GetName()
}, "5m", "20s").ShouldNot(o.Equal(currentMOSBName),
"A new MOSB should be created after deleting the off-cluster MC")
logger.Infof("OK!\n")

exutil.By("Wait for the new build to succeed and deploy")
o.Eventually(newMOSB, "20m", "20s").Should(HaveConditionField("Succeeded", "status", TrueString),
"New MachineOSBuild didn't succeed after MC deletion")
logger.Infof("New MOSB %s built successfully", newMOSB.GetName())
mcp.waitForComplete()
logger.Infof("OK!\n")

exutil.By("Verify only on-cluster layering content remains after MC deletion")
o.Expect(offClusterRF.Exists()).To(o.BeFalse(),
"%s should be removed after off-cluster MC is deleted", offClusterFile)
o.Expect(onClusterRF.Exists()).To(o.BeTrue(),
"%s should still exist after off-cluster MC is deleted", onClusterFile)
logger.Infof("Only on-cluster layering file remains on %s (as expected)", node)
logger.Infof("OK!\n")
})

})

func testContainerFile(containerFiles []ContainerFile, imageNamespace string, mcp *MachineConfigPool, checkers []Checker, defaultPullSecret bool) {
Expand Down