Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/console/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ type consoleOperator struct {
trackables trackables

monitoringDeploymentLister appsv1listers.DeploymentLister

lastDeploymentAvailableTime time.Time
}

type trackables struct {
Expand Down
35 changes: 28 additions & 7 deletions pkg/console/operator/sync_v400.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"slices"
"sort"
"strings"
"time"

// kube
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -46,6 +47,13 @@ import (
telemetry "github.com/openshift/console-operator/pkg/console/telemetry"
)

// deploymentAvailableGracePeriod is the duration the operator tolerates zero
// available replicas before reporting DeploymentAvailable=False. This absorbs
// brief disruptions (e.g. node reboots during conformance-serial tests) that
// take all replicas offline for ~10 seconds. Matches the Degraded inertia
// duration in library-go.
const deploymentAvailableGracePeriod = 2 * time.Minute

// The sync loop starts from zero and works its way through the requirements for a running console.
// If at any point something is missing, it creates/updates that piece and immediately dies.
// The next loop will pick up where they previous left off and move the process forward one step.
Expand Down Expand Up @@ -221,13 +229,7 @@ func (co *consoleOperator) sync_v400(ctx context.Context, controllerContext fact
return nil
}()))

statusHandler.AddCondition(status.HandleAvailable(func() (prefix string, reason string, err error) {
prefix = "Deployment"
if !deploymentsub.IsAvailable(actualDeployment) {
return prefix, "InsufficientReplicas", fmt.Errorf("%v replicas available for console deployment", actualDeployment.Status.ReadyReplicas)
}
return prefix, "", nil
}()))
statusHandler.AddCondition(status.HandleAvailable(co.evaluateDeploymentAvailability(actualDeployment)))

// if we survive the gauntlet, we need to update the console config with the
// public hostname so that the world can know the console is ready to roll
Expand Down Expand Up @@ -338,6 +340,25 @@ func (co *consoleOperator) SyncDeployment(
return deployment, "", nil
}

// evaluateDeploymentAvailability checks whether the console deployment should
// be reported as available. When replicas drop to zero during a transient
// disruption (e.g. node reboot in conformance-serial tests), the operator
// suppresses Available=False for up to deploymentAvailableGracePeriod to
// avoid alarming ClusterOperator condition blips.
func (co *consoleOperator) evaluateDeploymentAvailability(deployment *appsv1.Deployment) (prefix string, reason string, err error) {
prefix = "Deployment"
if deploymentsub.IsAvailable(deployment) {
co.lastDeploymentAvailableTime = time.Now()
return prefix, "", nil
}
if sinceLast := time.Since(co.lastDeploymentAvailableTime); !co.lastDeploymentAvailableTime.IsZero() && sinceLast <= deploymentAvailableGracePeriod {
klog.V(4).Infof("deployment has 0 available replicas but was available %v ago, within %v grace period",
sinceLast, deploymentAvailableGracePeriod)
return prefix, "", nil
}
return prefix, "InsufficientReplicas", fmt.Errorf("%v replicas available for console deployment", deployment.Status.ReadyReplicas)
}

// apply configmap (needs route)
// by the time we get to the configmap, we can assume the route exits & is configured properly
// therefore no additional error handling is needed here.
Expand Down
140 changes: 140 additions & 0 deletions pkg/console/operator/sync_v400_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"sort"
"testing"
"time"

"github.com/go-test/deep"

Expand Down Expand Up @@ -539,3 +540,142 @@ func TestGetTelemetryConfiguration_DisconnectedClusterNoError(t *testing.T) {
t.Error("expected ACCOUNT_MAIL key to be present even on disconnected cluster")
}
}

// TestEvaluateDeploymentAvailability tests the grace period logic for
// OCPBUGS-67134: the operator should suppress brief Available=False blips
// when all replicas are temporarily offline during disruptive operations.
func TestEvaluateDeploymentAvailability(t *testing.T) {
makeDeployment := func(availableReplicas int32) *appsv1.Deployment {
return &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: "console",
Namespace: "openshift-console",
},
Status: appsv1.DeploymentStatus{
AvailableReplicas: availableReplicas,
ReadyReplicas: availableReplicas,
},
}
}

t.Run("available deployment reports Available=True and updates timestamp", func(t *testing.T) {
co := &consoleOperator{}
deployment := makeDeployment(2)

prefix, reason, err := co.evaluateDeploymentAvailability(deployment)

if err != nil {
t.Errorf("expected no error, got: %v", err)
}
if reason != "" {
t.Errorf("expected empty reason, got: %q", reason)
}
if prefix != "Deployment" {
t.Errorf("expected prefix 'Deployment', got: %q", prefix)
}
if co.lastDeploymentAvailableTime.IsZero() {
t.Error("expected lastDeploymentAvailableTime to be set")
}
})

t.Run("unavailable with no prior availability reports Available=False immediately", func(t *testing.T) {
co := &consoleOperator{}
deployment := makeDeployment(0)

_, reason, err := co.evaluateDeploymentAvailability(deployment)

if err == nil {
t.Error("expected error, got nil")
}
if reason != "InsufficientReplicas" {
t.Errorf("expected reason 'InsufficientReplicas', got: %q", reason)
}
})

t.Run("unavailable within grace period reports Available=True (suppressed)", func(t *testing.T) {
co := &consoleOperator{
lastDeploymentAvailableTime: time.Now().Add(-10 * time.Second),
}
deployment := makeDeployment(0)

_, reason, err := co.evaluateDeploymentAvailability(deployment)

if err != nil {
t.Errorf("expected no error within grace period, got: %v", err)
}
if reason != "" {
t.Errorf("expected empty reason within grace period, got: %q", reason)
}
})

t.Run("unavailable beyond grace period reports Available=False", func(t *testing.T) {
co := &consoleOperator{
lastDeploymentAvailableTime: time.Now().Add(-3 * time.Minute),
}
deployment := makeDeployment(0)

_, reason, err := co.evaluateDeploymentAvailability(deployment)

if err == nil {
t.Error("expected error beyond grace period, got nil")
}
if reason != "InsufficientReplicas" {
t.Errorf("expected reason 'InsufficientReplicas', got: %q", reason)
}
})

t.Run("recovery after blip resets timestamp", func(t *testing.T) {
co := &consoleOperator{
lastDeploymentAvailableTime: time.Now().Add(-30 * time.Second),
}

// Simulate: was available, went to 0, then recovered
deployment := makeDeployment(0)
_, _, err := co.evaluateDeploymentAvailability(deployment)
if err != nil {
t.Error("expected suppression within grace period")
}

// Recovery
deployment = makeDeployment(2)
before := co.lastDeploymentAvailableTime
_, _, err = co.evaluateDeploymentAvailability(deployment)
if err != nil {
t.Errorf("expected no error on recovery, got: %v", err)
}
if !co.lastDeploymentAvailableTime.After(before) {
t.Error("expected lastDeploymentAvailableTime to be updated on recovery")
}
})

t.Run("unavailable just inside grace period boundary reports Available=True", func(t *testing.T) {
co := &consoleOperator{
lastDeploymentAvailableTime: time.Now().Add(-deploymentAvailableGracePeriod + time.Second),
}
deployment := makeDeployment(0)

_, reason, err := co.evaluateDeploymentAvailability(deployment)

if err != nil {
t.Errorf("expected no error just inside grace period, got: %v", err)
}
if reason != "" {
t.Errorf("expected empty reason just inside grace period, got: %q", reason)
}
})

t.Run("error message includes ready replica count", func(t *testing.T) {
co := &consoleOperator{}
deployment := makeDeployment(0)

_, _, err := co.evaluateDeploymentAvailability(deployment)

if err == nil {
t.Fatal("expected error, got nil")
}
expected := "0 replicas available for console deployment"
if err.Error() != expected {
t.Errorf("expected error message %q, got %q", expected, err.Error())
}
})
}