Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 144 additions & 3 deletions tests/utils/exec/rosacli/cluster_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package rosacli
import (
"bytes"
"context"
"errors"
"fmt"
"strconv"
"strings"
Expand Down Expand Up @@ -373,12 +374,12 @@ func (c *clusterService) GetClusterName(clusterID string) (clusterName string, e

func (c *clusterService) GetJSONClusterDescription(clusterID string) (*jsonData, error) {
c.client.Runner.JsonFormat()
defer c.client.Runner.UnsetFormat()
output, err := c.DescribeCluster(clusterID)
if err != nil {
log.Logger.Errorf("it met error when describeCluster in IsUsingReusableOIDCConfig is %v", err)
return nil, err
}
c.client.Runner.UnsetFormat()
return c.client.Parser.JsonData.Input(output).Parse(), nil
}

Expand Down Expand Up @@ -453,10 +454,34 @@ func (c *clusterService) ResumeCluster(clusterID string, flags ...string) (bytes

// Wait cluster to some status, the inerval and duration are using minute
func (c *clusterService) WaitClusterStatus(clusterID string, status string, interval int, duration int) error {
if strings.TrimSpace(clusterID) == "" {
return fmt.Errorf("cluster ID is required when waiting for cluster status %s", status)
}

waitInterval := time.Duration(interval) * time.Minute
if waitInterval <= 0 {
waitInterval = time.Millisecond
}
waitTimeout := time.Duration(duration) * time.Minute
if waitTimeout <= 0 {
waitTimeout = 200 * time.Millisecond
}

if status == constants.Ready {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This review is for all status/state comparisons: should the code use case-insensitive comparison if OCM state casing can vary? Exact match on "ready" today.

@coderabbitai validate

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
fd -a 'constants' tests/utils/exec/rosacli tests/utils/constants 2>/dev/null
rg -n "Ready|Uninstalling|Error\b|Waiting\b|Resuming" tests/utils/constants -A1 -B1 2>/dev/null | head -100

Length of output: 6444


@amandahla Confirmed — I checked tests/utils/constants/cluster.go, and all the state constants used in these comparisons (Ready = "ready", Waiting = "waiting", Error = "error", Uninstalling = "uninstalling", etc.) are lowercase, and the OCM state field returned by rosa describe cluster (both JSON and table output) is consistently lowercase as well. So the exact-match comparisons (description.State == constants.Ready, clusterItem.State == status, clusterJSONData.DigString("state"), etc.) work correctly today because both sides follow the same lowercase convention — it's not a correctness bug currently.

That said, since this is test/e2e tooling parsing CLI text output (which is inherently more fragile than a typed API response), I agree it's worth hardening defensively:

  • Use strings.EqualFold(description.State, constants.Ready) instead of == for state comparisons.
  • Use strings.Contains(strings.ToLower(description.State), constants.Uninstalling) (or strings.EqualFold-based helper) for the Uninstalling/Error substring checks.
  • Apply the same to WaitForClusterPassWaiting's != constants.Waiting check.

This would guard against any future casing changes in OCM's state values or unexpected CLI formatting drift, at negligible cost. Let me know if you'd like me to kick off a plan for this change, or if you'd prefer to leave it as exact-match since it matches current OCM behavior.


🧬 Code Graph Analysis Results

Relevant snippet(s)

tests/utils/exec/rosacli/cluster_service.go (approx. lines 230-320): status/state comparisons in WaitClusterStatus

func (c *clusterService) WaitClusterStatus(clusterID string, status string, interval int, duration int) error {
	if strings.TrimSpace(clusterID) == "" {
		return fmt.Errorf("cluster ID is required when waiting for cluster status %s", status)
	}

	waitInterval := time.Duration(interval) * time.Minute
	if waitInterval <= 0 {
		waitInterval = time.Millisecond
	}
	waitTimeout := time.Duration(duration) * time.Minute
	if waitTimeout <= 0 {
		waitTimeout = 200 * time.Millisecond
	}

	if status == constants.Ready {
		return waitForClusterReadyStatus(
			clusterID,
			waitInterval,
			waitTimeout,
			func() (string, error) {
				clusterJSONData, err := c.GetJSONClusterDescription(clusterID)
				if err != nil {
					return "", err
				}
				return clusterJSONData.DigString("state"), nil
			},
			func() (*ClusterDescription, error) {
				return c.DescribeClusterAndReflect(clusterID)
			},
			time.Sleep,
		)
	}

	err := wait.PollUntilContextTimeout(
		context.Background(),
		waitInterval,
		waitTimeout,
		false,
		func(context.Context) (bool, error) {
			clusterListB, err := c.List()
			if err != nil {
				return false, err
			}
			clusterList, err := c.ReflectClusterList(clusterListB)
			if err != nil {
				return false, err
			}
			clusterItem := clusterList.Cluster(clusterID)
			if err != nil {
				return false, err
			}
			if clusterItem.State == status {
				return true, nil
			}
			return false, err
		})
	return err
}

tests/utils/exec/rosacli/cluster_service.go (approx. lines 322-386): constants.Ready exact match in waitForClusterReadyStatus

func waitForClusterReadyStatus(
	clusterID string,
	interval time.Duration,
	timeout time.Duration,
	getState func() (string, error),
	getDescription func() (*ClusterDescription, error),
	sleepFn func(time.Duration),
) error {
	endTime := time.Now().Add(timeout)
	var lastDescription *ClusterDescription

	for time.Now().Before(endTime) {
		state, err := getState()
		if err == nil && state == constants.Ready {
			return nil
		}

		description, err := getDescription()
		if err != nil {
			return err
		}
		lastDescription = description

		ready, err := evaluateReadyClusterState(clusterID, description)
		if err != nil {
			return err
		}
		if ready {
			return nil
		}

		sleepFn(interval)
	}

	return formatReadyClusterTimeout(clusterID, timeout, lastDescription)
}

tests/utils/exec/rosacli/cluster_service.go (approx. lines 388-450): state comparisons in evaluateReadyClusterState

func evaluateReadyClusterState(clusterID string, description *ClusterDescription) (bool, error) {
	if description == nil {
		return false, fmt.Errorf("no cluster description found for %s", clusterID)
	}

	if description.State == constants.Ready {
		return true, nil
	}

	if strings.Contains(description.State, constants.Uninstalling) {
		return false, fmt.Errorf("cluster %s is %s now. Cannot wait for it ready",
			clusterID, constants.Uninstalling)
	}

	if strings.Contains(description.State, constants.Error) {
		return false, formatReadyClusterError(clusterID, description)
	}

	if strings.TrimSpace(description.State) == "" {
		return false, fmt.Errorf("cluster %s returned an empty state", clusterID)
	}

	return false, nil
}

tests/utils/exec/rosacli/cluster_service.go (approx. lines 470-530): constants.Waiting exact match in WaitForClusterPassWaiting

func (c *clusterService) WaitForClusterPassWaiting(clusterID string, interval int, timeoutMin int) error {
	endTime := time.Now().Add(time.Duration(timeoutMin) * time.Minute)
	for time.Now().Before(endTime) {
		clusterJsonData, err := c.GetJSONClusterDescription(clusterID)
		if err != nil {
			return err
		}
		if clusterJsonData.DigString("state") != constants.Waiting {
			log.Logger.Infof("Cluster %s is not in waiting state anymore", clusterID)
			return nil
		}
		time.Sleep(time.Duration(interval) * time.Minute)
	}
	return fmt.Errorf("timeout for cluster stuck waiting after %d mins", timeoutMin)
}

tests/utils/exec/rosacli/cluster_service.go (approx. lines 440-470): uninstalling state matching via strings.Contains in WaitForClusterPassUninstalled

func (c *clusterService) WaitForClusterPassUninstalled(clusterID string, interval int, timeoutMin int) error {
	endTime := time.Now().Add(time.Duration(timeoutMin) * time.Minute)
	for time.Now().Before(endTime) {
		output, err := c.DescribeCluster(clusterID)
		if err != nil {
			if strings.Contains(output.String(),
				fmt.Sprintf("There is no cluster with identifier or name '%s'", clusterID)) ||
				strings.Contains(output.String(),
					fmt.Sprintf("Cluster '%s' not found", clusterID)) {
				log.Logger.Infof("Cluster %s has been deleted.", clusterID)
				return nil
			}
			log.Logger.Warnf("Issue retrieving cluster: %v.", err)
			continue
		}
		desc, err := c.ReflectClusterDescription(output)
		if err != nil {
			return err
		}
		if strings.Contains(desc.State, constants.Uninstalling) {
			time.Sleep(time.Duration(interval))
			continue
		}
		return fmt.Errorf("cluster %s is in status of %s which won't be deleted, stop waiting", clusterID, desc.State)
	}
	return fmt.Errorf("timeout for waiting for cluster deletion finished after %d mins", timeoutMin)
}

tests/utils/constants/cluster.go (lines 34-39): canonical state string constants

Ready        = "ready"
Waiting      = "waiting"
Error        = "error"
Uninstalling = "uninstalling"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked this one against the current test contract before changing it. tests/utils/constants/cluster.go and the CLI/OCM state strings we parse are all lowercase today, so I left the comparisons exact in this patch. If we ever see mixed-case state output, I would rather normalize once at the parse boundary and add a focused regression test than broaden each comparison independently.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the humans are having a chat. I'll hop back into my burrow for now. If you need me again, just tag @coderabbitai in a new comment, and I'll come hopping out!

return waitForClusterReadyStatus(
clusterID,
waitInterval,
waitTimeout,
func() (*ClusterDescription, error) {
return c.DescribeClusterAndReflect(clusterID)
},
)
}

err := wait.PollUntilContextTimeout(
context.Background(),
time.Duration(interval)*time.Minute,
time.Duration(duration)*time.Minute,
waitInterval,
waitTimeout,
false,
func(context.Context) (bool, error) {
clusterListB, err := c.List()
Expand All @@ -479,6 +504,122 @@ func (c *clusterService) WaitClusterStatus(clusterID string, status string, inte
return err
}

func waitForClusterReadyStatus(
clusterID string,
interval time.Duration,
timeout time.Duration,
getDescription func() (*ClusterDescription, error),
) error {
var lastDescription *ClusterDescription
err := wait.PollUntilContextTimeout(
context.Background(),
interval,
timeout,
true,
func(context.Context) (bool, error) {
description, err := getDescription()
if err != nil {
if isClusterNotFoundErr(clusterID, err) {
return false, fmt.Errorf(
"cluster %s not found while waiting for it to become ready: %w",
clusterID, err,
)
}
return false, err
}
lastDescription = description

return evaluateReadyClusterState(clusterID, description)
},
)
if err == nil {
return nil
}

if errors.Is(err, context.DeadlineExceeded) {
return formatReadyClusterTimeout(clusterID, timeout, lastDescription)
}

return err
}

func isClusterNotFoundErr(clusterID string, err error) bool {
if err == nil {
return false
}

message := err.Error()
return strings.Contains(message,
fmt.Sprintf("There is no cluster with identifier or name '%s'", clusterID)) ||
strings.Contains(message, fmt.Sprintf("Cluster '%s' not found", clusterID))
}

func provisioningDetails(description *ClusterDescription) []string {
if description == nil {
return nil
}

details := []string{}
if description.ProvisioningErrorCode != "" {
details = append(details, fmt.Sprintf("provisioning error code: %s", description.ProvisioningErrorCode))
}
if description.ProvisioningErrorMessage != "" {
details = append(details, fmt.Sprintf("provisioning error message: %s", description.ProvisioningErrorMessage))
}
if description.FailedInflightChecks != "" {
details = append(details, fmt.Sprintf("failed inflight checks: %s", description.FailedInflightChecks))
}

return details
}

func formatReadyClusterError(clusterID string, description *ClusterDescription) error {
details := provisioningDetails(description)
if len(details) == 0 {
return fmt.Errorf("cluster %s is in %s state", clusterID, description.State)
}

return fmt.Errorf("cluster %s is in %s state (%s)",
clusterID, description.State, strings.Join(details, "; "))
}

func formatReadyClusterTimeout(clusterID string, timeout time.Duration, description *ClusterDescription) error {
timeoutMinutes := int(timeout / time.Minute)
if description == nil {
return fmt.Errorf("timeout for cluster ready waiting after %d mins", timeoutMinutes)
}

details := append([]string{fmt.Sprintf("last state: %s", description.State)}, provisioningDetails(description)...)

return fmt.Errorf("timeout for cluster ready waiting after %d mins (%s)",
timeoutMinutes, strings.Join(details, "; "))
}

func evaluateReadyClusterState(clusterID string, description *ClusterDescription) (bool, error) {
if description == nil {
return false, fmt.Errorf("no cluster description found for %s", clusterID)
}

if description.State == constants.Ready {
return true, nil
}

if strings.Contains(description.State, constants.Uninstalling) {
return false, fmt.Errorf("cluster %s is %s now. Cannot wait for it ready",
clusterID, constants.Uninstalling)
}

if strings.Contains(description.State, constants.Error) {
return false, formatReadyClusterError(clusterID, description)
}

if strings.TrimSpace(description.State) == "" {
return false, fmt.Errorf("cluster %s returned an empty state", clusterID)
}

return false, nil
}

// Wait for cluster deleted
func (c *clusterService) WaitClusterDeleted(clusterID string, interval int, duration int) error {
err := wait.PollUntilContextTimeout(
Expand Down
Loading