From 036ef390e40fe56618f249c2063328eabf63e65e Mon Sep 17 00:00:00 2001 From: Shreya Biradar Date: Thu, 18 Sep 2025 10:59:32 +0530 Subject: [PATCH 1/3] Replace sleep check with k8s API for checking pod readiness --- internal/controller/kruize_controller.go | 90 +++++++++++++++--------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/internal/controller/kruize_controller.go b/internal/controller/kruize_controller.go index f605f625..4e0fd62b 100755 --- a/internal/controller/kruize_controller.go +++ b/internal/controller/kruize_controller.go @@ -43,6 +43,9 @@ import ( mydomainv1alpha1 "github.com/kruize/kruize-operator/api/v1alpha1" "sigs.k8s.io/controller-runtime/pkg/log" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" ) // KruizeReconciler reconciles a Kruize object @@ -139,8 +142,12 @@ func (r *KruizeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr targetNamespace = "openshift-tuning" } + labels := map[string][]string{ + "app": {"kruize", "kruize-ui-nginx", "kruize-db"}, + } + // Wait for Kruize pods to be ready - err = r.waitForKruizePods(ctx, targetNamespace, 5*time.Minute) + err = r.waitForKruizePods(ctx, targetNamespace, labels, 5*time.Minute) if err != nil { logger.Error(err, "Kruize pods not ready yet") return ctrl.Result{RequeueAfter: 30 * time.Second}, nil @@ -156,46 +163,63 @@ func (r *KruizeReconciler) isTestMode() bool { return testMode == "true" || testMode == "1" } -func (r *KruizeReconciler) waitForKruizePods(ctx context.Context, namespace string, timeout time.Duration) error { - logger := log.FromContext(ctx) +func (r *KruizeReconciler) waitForKruizePods(ctx context.Context, namespace string, labelsToMatch map[string][]string, timeout time.Duration) error { - // Skip pod waiting in test mode - if r.isTestMode() { - logger.Info("Test mode detected, skipping pod readiness check", "namespace", namespace) - return nil - } + selector := labels.NewSelector() + for key, values := range labelsToMatch { + var op selection.Operator + if len(values) == 1 { + op = selection.Equals + } else { + op = selection.In + } - requiredPods := []string{"kruize", "kruize-ui-nginx", "kruize-db"} - logger.Info("Waiting for Kruize pods to be ready", "namespace", namespace, "pods", requiredPods) + req, err := labels.NewRequirement(key, op, values) + if err != nil { + return err + } + selector = selector.Add(*req) + } - timeoutCh := time.After(timeout) - ticker := time.NewTicker(15 * time.Second) - defer ticker.Stop() + fmt.Printf("Starting to wait for Kruize pods with selector: \"%s\"\n", selector) - for { - select { - case <-timeoutCh: - return fmt.Errorf("timeout waiting for Kruize pods to be ready in namespace %s", namespace) + return wait.PollUntilContextTimeout(ctx, 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + opts := []client.ListOption{ + client.InNamespace(namespace), + client.MatchingLabelsSelector{Selector: selector}, + } - case <-ticker.C: - readyPods, totalPods, podStatus, err := r.checkKruizePodsStatus(ctx, namespace) - if err != nil { - logger.Error(err, "Failed to check pod status") - continue - } + podList := &corev1.PodList{} + if err := r.Client.List(ctx, podList, opts...); err != nil { + return false, err + } - logger.Info("Pod status check", "ready", readyPods, "total", totalPods, "namespace", namespace) - fmt.Printf("Pod status: %v\n", podStatus) + if len(podList.Items) == 0 { + return false, nil + } - // Check if we have all required pods running - if readyPods >= 3 && totalPods >= 3 { - logger.Info("All Kruize pods are ready", "readyPods", readyPods) - return nil - } + var readyPods int + for _, pod := range podList.Items { + if isPodReady(&pod) { + readyPods++ + } + } + if readyPods == len(podList.Items) { + fmt.Printf("✅ Success! All %d pods are ready!\n", len(podList.Items)) + return true, nil + } + fmt.Printf("Waiting for all pods to become ready (%d/%d)...\n", readyPods, len(podList.Items)) + return false, nil + }) +} - logger.Info("Waiting for more pods to be ready", "ready", readyPods, "total", totalPods) - } - } +func isPodReady(pod *corev1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true + } + } + return false } func (r *KruizeReconciler) checkKruizePodsStatus(ctx context.Context, namespace string) (int, int, map[string]string, error) { From ac241206401f6184d6ad9622fe48df4210d6e246 Mon Sep 17 00:00:00 2001 From: Shreya Biradar Date: Mon, 22 Sep 2025 13:47:10 +0530 Subject: [PATCH 2/3] Enable exponential backoff when pods are not in ready state --- internal/controller/kruize_controller.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/controller/kruize_controller.go b/internal/controller/kruize_controller.go index 4e0fd62b..5e2f542a 100755 --- a/internal/controller/kruize_controller.go +++ b/internal/controller/kruize_controller.go @@ -150,7 +150,7 @@ func (r *KruizeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr err = r.waitForKruizePods(ctx, targetNamespace, labels, 5*time.Minute) if err != nil { logger.Error(err, "Kruize pods not ready yet") - return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + return ctrl.Result{}, err } logger.Info("All Kruize pods are ready!", "namespace", targetNamespace) @@ -183,7 +183,7 @@ func (r *KruizeReconciler) waitForKruizePods(ctx context.Context, namespace stri fmt.Printf("Starting to wait for Kruize pods with selector: \"%s\"\n", selector) - return wait.PollUntilContextTimeout(ctx, 5*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + return wait.PollUntilContextTimeout(ctx, 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) { opts := []client.ListOption{ client.InNamespace(namespace), client.MatchingLabelsSelector{Selector: selector}, @@ -208,7 +208,7 @@ func (r *KruizeReconciler) waitForKruizePods(ctx context.Context, namespace stri fmt.Printf("✅ Success! All %d pods are ready!\n", len(podList.Items)) return true, nil } - fmt.Printf("Waiting for all pods to become ready (%d/%d)...\n", readyPods, len(podList.Items)) + fmt.Printf("Waiting for Kruize pods to become ready (%d/%d)...\n", readyPods, len(podList.Items)) return false, nil }) } From 21c148f3f185395bd08f7d0221a15f23c3ef87b3 Mon Sep 17 00:00:00 2001 From: Shreya Biradar Date: Tue, 23 Sep 2025 14:40:55 +0530 Subject: [PATCH 3/3] Skip pod waiting in test mode --- internal/controller/kruize_controller.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/controller/kruize_controller.go b/internal/controller/kruize_controller.go index 5e2f542a..0e8bea52 100755 --- a/internal/controller/kruize_controller.go +++ b/internal/controller/kruize_controller.go @@ -164,6 +164,13 @@ func (r *KruizeReconciler) isTestMode() bool { } func (r *KruizeReconciler) waitForKruizePods(ctx context.Context, namespace string, labelsToMatch map[string][]string, timeout time.Duration) error { + logger := log.FromContext(ctx) + + // Skip pod waiting in test mode + if r.isTestMode() { + logger.Info("Test mode detected, skipping pod readiness check", "namespace", namespace) + return nil + } selector := labels.NewSelector() for key, values := range labelsToMatch {