From 36ebd632e187bfd6a081f9dd765026af904aad81 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 16 Jun 2026 13:39:30 -0400 Subject: [PATCH 1/3] feat: surface init container failures, plugin compatibility, and version downgrade warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Claw deployment fails due to init container crashes (e.g., a plugin requiring a newer OpenClaw version than spec.version), the operator previously reported only "Waiting for deployments to become ready" — forcing users to manually dig through pod logs to find the root cause. This was observed in production when spec.version 2026.6.5 was deployed against a PVC configured for 2026.6.8, causing the @openclaw/anthropic-vertex-provider plugin's init-plugins container to crash-loop with no actionable status on the CR. This commit adds three warning-only status conditions: 1. Ready condition enrichment: when deployments are not ready, the operator inspects pods for init container failures (non-zero exit or CrashLoopBackOff) and surfaces the actual error message in the Ready condition with reason InitContainerFailure. 2. PluginCompatibility condition: when spec.version is older than a plugin's minimum required version (declared via PluginMinVersion in knownProviders), a warning condition is set. This does not block deployment — users may have compatible plugins cached on PVC. 3. VersionDowngrade condition: when spec.version is older than status.lastDeployedVersion, a warning is set about potential PVC data incompatibility. Downgrades remain fully permitted. Co-Authored-By: Claude Opus 4.6 --- api/v1alpha1/claw_types.go | 26 ++- .../bases/claw.sandbox.redhat.com_claws.yaml | 5 + config/rbac/role.yaml | 1 + internal/controller/claw_plugins.go | 63 ++++++ internal/controller/claw_plugins_test.go | 87 ++++++++ internal/controller/claw_providers.go | 19 +- .../controller/claw_resource_controller.go | 11 + internal/controller/claw_status.go | 114 ++++++++++- internal/controller/claw_status_test.go | 190 ++++++++++++++++++ 9 files changed, 499 insertions(+), 17 deletions(-) diff --git a/api/v1alpha1/claw_types.go b/api/v1alpha1/claw_types.go index e0672719..57a95f6f 100644 --- a/api/v1alpha1/claw_types.go +++ b/api/v1alpha1/claw_types.go @@ -82,6 +82,8 @@ const ( ConditionTypeWebSearchConfigured = "WebSearchConfigured" ConditionTypeIdle = "Idle" ConditionTypeRestrictionsEnforced = "RestrictionsEnforced" + ConditionTypePluginCompatibility = "PluginCompatibility" + ConditionTypeVersionDowngrade = "VersionDowngrade" ) // Annotation keys used on pod templates to trigger rollouts on config changes. @@ -97,14 +99,17 @@ const ( // Condition reasons for Claw status. const ( - ConditionReasonReady = "Ready" - ConditionReasonProvisioning = "Provisioning" - ConditionReasonResolved = "Resolved" - ConditionReasonValidationFailed = "ValidationFailed" - ConditionReasonConfigured = "Configured" - ConditionReasonConfigFailed = "ConfigFailed" - ConditionReasonIdle = "Idle" - ConditionReasonIdledByRequest = "IdledByRequest" + ConditionReasonReady = "Ready" + ConditionReasonProvisioning = "Provisioning" + ConditionReasonResolved = "Resolved" + ConditionReasonValidationFailed = "ValidationFailed" + ConditionReasonConfigured = "Configured" + ConditionReasonConfigFailed = "ConfigFailed" + ConditionReasonIdle = "Idle" + ConditionReasonIdledByRequest = "IdledByRequest" + ConditionReasonIncompatible = "Incompatible" + ConditionReasonVersionDowngrade = "VersionDowngrade" + ConditionReasonInitContainerFailure = "InitContainerFailure" ) // SecretRefEntry references a specific key in a Secret. @@ -743,6 +748,11 @@ type ClawStatus struct { // GatewayURL is the HTTPS URL for accessing the Claw gateway, including the auth token fragment when applicable // +optional GatewayURL string `json:"gatewayURL,omitempty"` + + // LastDeployedVersion records the spec.version that was last successfully deployed. + // Used to detect version downgrades that may cause PVC data incompatibility. + // +optional + LastDeployedVersion string `json:"lastDeployedVersion,omitempty"` } // +kubebuilder:object:root=true diff --git a/config/crd/bases/claw.sandbox.redhat.com_claws.yaml b/config/crd/bases/claw.sandbox.redhat.com_claws.yaml index c0cb6e98..939790ec 100644 --- a/config/crd/bases/claw.sandbox.redhat.com_claws.yaml +++ b/config/crd/bases/claw.sandbox.redhat.com_claws.yaml @@ -1060,6 +1060,11 @@ spec: description: GatewayURL is the HTTPS URL for accessing the Claw gateway, including the auth token fragment when applicable type: string + lastDeployedVersion: + description: |- + LastDeployedVersion records the spec.version that was last successfully deployed. + Used to detect version downgrades that may cause PVC data incompatibility. + type: string url: description: |- Deprecated: Use GatewayURL instead. Will be removed in a future version. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index cd8d392a..69cea4fa 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -22,6 +22,7 @@ rules: resources: - pods verbs: + - get - list - apiGroups: - "" diff --git a/internal/controller/claw_plugins.go b/internal/controller/claw_plugins.go index 27bf66cc..b7aa6689 100644 --- a/internal/controller/claw_plugins.go +++ b/internal/controller/claw_plugins.go @@ -18,6 +18,7 @@ package controller import ( "fmt" + "strconv" "strings" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -256,3 +257,65 @@ func pluginsNoProxy(instance *clawv1alpha1.Claw) string { } return base } + +// compareCalver compares two calver version strings (e.g. "2026.6.5"). +// Returns -1 if a < b, 0 if a == b, 1 if a > b. +// Returns 0 for malformed input (fail-open). +func compareCalver(a, b string) int { + aParts := strings.Split(a, ".") + bParts := strings.Split(b, ".") + + maxLen := len(aParts) + if len(bParts) > maxLen { + maxLen = len(bParts) + } + + for i := range maxLen { + var aVal, bVal int + var err error + if i < len(aParts) { + aVal, err = strconv.Atoi(aParts[i]) + if err != nil { + return 0 + } + } + if i < len(bParts) { + bVal, err = strconv.Atoi(bParts[i]) + if err != nil { + return 0 + } + } + if aVal < bVal { + return -1 + } + if aVal > bVal { + return 1 + } + } + return 0 +} + +// checkPluginCompatibility checks whether any implicitly required plugin +// has a minimum version that exceeds spec.version. Returns a warning +// message or "" if all plugins are compatible. +func checkPluginCompatibility(instance *clawv1alpha1.Claw) string { + if instance.Spec.Version == "" { + return "" + } + for _, cred := range instance.Spec.Credentials { + if !usesVertexSDK(cred) { + continue + } + defaults, ok := knownProviders[cred.Provider] + if !ok || defaults.VertexPlugin == "" || defaults.PluginMinVersion == "" { + continue + } + if compareCalver(instance.Spec.Version, defaults.PluginMinVersion) < 0 { + return fmt.Sprintf( + "plugin %s requires OpenClaw >= %s, but spec.version is %s", + defaults.VertexPlugin, defaults.PluginMinVersion, instance.Spec.Version, + ) + } + } + return "" +} diff --git a/internal/controller/claw_plugins_test.go b/internal/controller/claw_plugins_test.go index a9d30e51..e88bba3d 100644 --- a/internal/controller/claw_plugins_test.go +++ b/internal/controller/claw_plugins_test.go @@ -794,3 +794,90 @@ func TestPluginsIntegration(t *testing.T) { assert.NotEqual(t, hash1, hash2, "config hash should change when plugins change") }) } + +const ( + testVersionOld = "2026.6.5" + testVersionMinPlugin = "2026.6.8" +) + +func TestCompareCalver(t *testing.T) { + tests := []struct { + name string + a, b string + want int + }{ + {"a less than b", testVersionOld, testVersionMinPlugin, -1}, + {"a greater than b", testVersionMinPlugin, testVersionOld, 1}, + {"equal", testVersionMinPlugin, testVersionMinPlugin, 0}, + {"numeric not lexicographic", "2026.10.1", "2026.9.30", 1}, + {"year difference", "2027.1.1", "2026.12.31", 1}, + {"different segment count", "2026.6", "2026.6.0", 0}, + {"malformed a", "invalid", testVersionMinPlugin, 0}, + {"malformed b", testVersionOld, "bad", 0}, + {"both empty", "", "", 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, compareCalver(tt.a, tt.b)) + }) + } +} + +func TestCheckPluginCompatibility(t *testing.T) { + t.Run("no version set", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Credentials = []clawv1alpha1.CredentialSpec{ + {Name: "vertex", Type: clawv1alpha1.CredentialTypeGCP, Provider: "anthropic", + GCP: &clawv1alpha1.GCPConfig{Project: "proj"}}, + } + assert.Empty(t, checkPluginCompatibility(instance)) + }) + + t.Run("compatible version", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Version = testVersionMinPlugin + instance.Spec.Credentials = []clawv1alpha1.CredentialSpec{ + {Name: "vertex", Type: clawv1alpha1.CredentialTypeGCP, Provider: "anthropic", + GCP: &clawv1alpha1.GCPConfig{Project: "proj"}}, + } + assert.Empty(t, checkPluginCompatibility(instance)) + }) + + t.Run("incompatible version", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Version = testVersionOld + instance.Spec.Credentials = []clawv1alpha1.CredentialSpec{ + {Name: "vertex", Type: clawv1alpha1.CredentialTypeGCP, Provider: "anthropic", + GCP: &clawv1alpha1.GCPConfig{Project: "proj"}}, + } + result := checkPluginCompatibility(instance) + assert.Contains(t, result, testVersionMinPlugin) + assert.Contains(t, result, testVersionOld) + assert.Contains(t, result, "@openclaw/anthropic-vertex-provider") + }) + + t.Run("non-vertex credential", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Version = testVersionOld + instance.Spec.Credentials = []clawv1alpha1.CredentialSpec{ + {Name: "api", Type: clawv1alpha1.CredentialTypeAPIKey, Provider: "anthropic"}, + } + assert.Empty(t, checkPluginCompatibility(instance)) + }) + + t.Run("google gcp has no vertex plugin", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Version = testVersionOld + instance.Spec.Credentials = []clawv1alpha1.CredentialSpec{ + {Name: "vertex", Type: clawv1alpha1.CredentialTypeGCP, Provider: "google", + GCP: &clawv1alpha1.GCPConfig{Project: "proj"}}, + } + assert.Empty(t, checkPluginCompatibility(instance)) + }) + + t.Run("no credentials", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + instance.Spec.Version = testVersionOld + assert.Empty(t, checkPluginCompatibility(instance)) + }) +} diff --git a/internal/controller/claw_providers.go b/internal/controller/claw_providers.go index 7b43e691..d9f73ed0 100644 --- a/internal/controller/claw_providers.go +++ b/internal/controller/claw_providers.go @@ -68,6 +68,10 @@ type providerDefaults struct { // manifest. Empty means no config entry is needed. VertexPluginID string + // PluginMinVersion is the minimum OpenClaw calver version (e.g. "2026.6.8") + // required for VertexPlugin to work. Empty means no minimum. + PluginMinVersion string + // Models is the hardcoded model catalog for this provider. // Order matters: the first model becomes the default primary when this // provider is the first configured credential in the Claw CR; remaining @@ -102,13 +106,14 @@ var knownProviders = map[string]providerDefaults{ }, }, "anthropic": { - CredType: clawv1alpha1.CredentialTypeAPIKey, - Domain: "api.anthropic.com", - Header: "x-api-key", - API: "anthropic-messages", - VertexAPI: "anthropic-messages", - VertexPlugin: "@openclaw/anthropic-vertex-provider", - VertexPluginID: "anthropic-vertex", + CredType: clawv1alpha1.CredentialTypeAPIKey, + Domain: "api.anthropic.com", + Header: "x-api-key", + API: "anthropic-messages", + VertexAPI: "anthropic-messages", + VertexPlugin: "@openclaw/anthropic-vertex-provider", + VertexPluginID: "anthropic-vertex", + PluginMinVersion: "2026.6.8", Models: []modelEntry{ {Name: "claude-sonnet-4-6", Alias: "Claude Sonnet 4.6"}, {Name: "claude-opus-4-8", Alias: "Claude Opus 4.8"}, diff --git a/internal/controller/claw_resource_controller.go b/internal/controller/claw_resource_controller.go index 1d959620..bbe1db85 100644 --- a/internal/controller/claw_resource_controller.go +++ b/internal/controller/claw_resource_controller.go @@ -409,6 +409,7 @@ type ClawResourceReconciler struct { // +kubebuilder:rbac:groups=claw.sandbox.redhat.com,resources=claws/finalizers,verbs=update // +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,verbs=get;list;watch;create;update;patch;delete @@ -925,12 +926,22 @@ func (r *ClawResourceReconciler) configureDeployments( } } if !pluginInstallationDisabled(instance) { + if warning := checkPluginCompatibility(instance); warning != "" { + setCondition(instance, clawv1alpha1.ConditionTypePluginCompatibility, + metav1.ConditionFalse, clawv1alpha1.ConditionReasonIncompatible, warning) + } else { + meta.RemoveStatusCondition(&instance.Status.Conditions, + clawv1alpha1.ConditionTypePluginCompatibility) + } plugins := effectivePlugins(instance) if len(plugins) > 0 { if err := configurePluginsInitContainer(objects, instance, plugins); err != nil { return fmt.Errorf("failed to configure plugins init container: %w", err) } } + } else { + meta.RemoveStatusCondition(&instance.Status.Conditions, + clawv1alpha1.ConditionTypePluginCompatibility) } if err := configureAgentFiles(objects, instance); err != nil { return fmt.Errorf("failed to configure agent files: %w", err) diff --git a/internal/controller/claw_status.go b/internal/controller/claw_status.go index 45688187..1d601665 100644 --- a/internal/controller/claw_status.go +++ b/internal/controller/claw_status.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "net/url" + "strings" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -171,6 +172,81 @@ func setReadyCondition(instance *clawv1alpha1.Claw, ready bool, pendingDeploymen }) } +// setReadyConditionWithDetail sets the Ready condition, enriching the message +// with init container failure details when available. +func setReadyConditionWithDetail( + instance *clawv1alpha1.Claw, + ready bool, + pendingDeployments []string, + initFailureDetail string, +) { + if ready || initFailureDetail == "" { + setReadyCondition(instance, ready, pendingDeployments) + return + } + + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: clawv1alpha1.ConditionTypeReady, + Status: metav1.ConditionFalse, + Reason: clawv1alpha1.ConditionReasonInitContainerFailure, + Message: initFailureDetail, + ObservedGeneration: instance.Generation, + }) +} + +// checkPodInitFailures inspects pods owned by the named Deployment for +// init container failures (non-zero exit or CrashLoopBackOff). Returns +// a human-readable description or "" if no failures are found. +func (r *ClawResourceReconciler) checkPodInitFailures( + ctx context.Context, + namespace, deploymentName string, +) string { + deployment := &appsv1.Deployment{} + if err := r.Get(ctx, client.ObjectKey{ + Namespace: namespace, Name: deploymentName, + }, deployment); err != nil { + return "" + } + + selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) + if err != nil { + return "" + } + + pods := &corev1.PodList{} + if err := r.List(ctx, pods, + client.InNamespace(namespace), + client.MatchingLabelsSelector{Selector: selector}, + ); err != nil { + return "" + } + + var failures []string + for i := range pods.Items { + for _, cs := range pods.Items[i].Status.InitContainerStatuses { + if t := cs.State.Terminated; t != nil && t.ExitCode != 0 { + msg := t.Reason + if t.Message != "" { + msg = t.Message + } + failures = append(failures, fmt.Sprintf( + "init container %q failed (exit %d): %s", + cs.Name, t.ExitCode, msg)) + } else if w := cs.State.Waiting; w != nil && + w.Reason == "CrashLoopBackOff" { + failures = append(failures, fmt.Sprintf( + "init container %q in CrashLoopBackOff: %s", + cs.Name, w.Message)) + } + } + } + + if len(failures) == 0 { + return "" + } + return strings.Join(failures, "; ") +} + // getGatewayToken fetches the gateway token from the gateway token Secret and Base64-decodes it. // Returns the token string, or empty string if the Secret cannot be read. func (r *ClawResourceReconciler) getGatewayToken(ctx context.Context, namespace, instanceName string) string { @@ -224,8 +300,19 @@ func (r *ClawResourceReconciler) updateStatus(ctx context.Context, instance *cla return fmt.Errorf("failed to check deployment readiness: %w", err) } - // Set Ready condition - setReadyCondition(instance, ready, pending) + // When deployments are not ready, inspect pods for init container failures + var initFailureDetail string + if !ready { + for _, depName := range pending { + if detail := r.checkPodInitFailures(ctx, instance.Namespace, depName); detail != "" { + initFailureDetail = detail + break + } + } + } + + // Set Ready condition (with init failure detail if available) + setReadyConditionWithDetail(instance, ready, pending, initFailureDetail) // Expose gateway secret name in status instance.Status.GatewayTokenSecretRef = getGatewaySecretName(instance.Name) @@ -253,6 +340,29 @@ func (r *ClawResourceReconciler) updateStatus(ctx context.Context, instance *cla instance.Status.GatewayURL = "" } + // Version downgrade detection + if instance.Spec.Version != "" && + instance.Status.LastDeployedVersion != "" && + compareCalver(instance.Spec.Version, instance.Status.LastDeployedVersion) < 0 { + setCondition(instance, + clawv1alpha1.ConditionTypeVersionDowngrade, + metav1.ConditionTrue, + clawv1alpha1.ConditionReasonVersionDowngrade, + fmt.Sprintf( + "spec.version %s is older than previously deployed version %s; "+ + "PVC data may be incompatible", + instance.Spec.Version, instance.Status.LastDeployedVersion), + ) + } else { + meta.RemoveStatusCondition(&instance.Status.Conditions, + clawv1alpha1.ConditionTypeVersionDowngrade) + } + + // Track last deployed version when ready + if ready && instance.Spec.Version != "" { + instance.Status.LastDeployedVersion = instance.Spec.Version + } + recordClawMetrics(instance) // Update status subresource diff --git a/internal/controller/claw_status_test.go b/internal/controller/claw_status_test.go index 441d5045..9da5434c 100644 --- a/internal/controller/claw_status_test.go +++ b/internal/controller/claw_status_test.go @@ -1247,3 +1247,193 @@ func TestURLConstructionWithTokenFragment(t *testing.T) { assert.Contains(t, result, "#token=") }) } + +func TestSetReadyConditionWithDetail(t *testing.T) { + t.Run("ready true ignores detail", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + setReadyConditionWithDetail(instance, true, nil, "some detail") + condition := meta.FindStatusCondition(instance.Status.Conditions, clawv1alpha1.ConditionTypeReady) + require.NotNil(t, condition) + assert.Equal(t, metav1.ConditionTrue, condition.Status) + assert.Equal(t, clawv1alpha1.ConditionReasonReady, condition.Reason) + }) + + t.Run("not ready with init failure detail", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + detail := `init container "init-plugins" failed (exit 1): Error` + setReadyConditionWithDetail(instance, false, []string{"instance"}, detail) + condition := meta.FindStatusCondition(instance.Status.Conditions, clawv1alpha1.ConditionTypeReady) + require.NotNil(t, condition) + assert.Equal(t, metav1.ConditionFalse, condition.Status) + assert.Equal(t, clawv1alpha1.ConditionReasonInitContainerFailure, condition.Reason) + assert.Equal(t, detail, condition.Message) + }) + + t.Run("not ready without detail falls back to provisioning", func(t *testing.T) { + instance := &clawv1alpha1.Claw{} + setReadyConditionWithDetail(instance, false, []string{"instance"}, "") + condition := meta.FindStatusCondition(instance.Status.Conditions, clawv1alpha1.ConditionTypeReady) + require.NotNil(t, condition) + assert.Equal(t, metav1.ConditionFalse, condition.Status) + assert.Equal(t, clawv1alpha1.ConditionReasonProvisioning, condition.Reason) + }) +} + +func TestVersionDowngradeDetection(t *testing.T) { + t.Run("should set VersionDowngrade condition on downgrade", func(t *testing.T) { + t.Cleanup(func() { + deleteAndWaitAllResources(t, namespace) + }) + + ctx := context.Background() + secret := createTestAPIKeySecret(aiModelSecret, namespace, aiModelSecretKey, aiModelSecretValue) + require.NoError(t, k8sClient.Create(ctx, secret)) + + instance := &clawv1alpha1.Claw{} + instance.Name = testInstanceName + instance.Namespace = namespace + instance.Spec.Credentials = testCredentials() + instance.Spec.Version = "2026.6.8" + require.NoError(t, k8sClient.Create(ctx, instance)) + + reconciler := createClawReconciler() + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + setCoreDeploymentsAvailable(t, ctx, testInstanceName, namespace) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + // Verify version was recorded + updated := &clawv1alpha1.Claw{} + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + assert.Equal(t, "2026.6.8", updated.Status.LastDeployedVersion) + + // Downgrade + updated.Spec.Version = "2026.6.5" + require.NoError(t, k8sClient.Update(ctx, updated)) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + condition := meta.FindStatusCondition(updated.Status.Conditions, + clawv1alpha1.ConditionTypeVersionDowngrade) + require.NotNil(t, condition, "VersionDowngrade condition should be set") + assert.Equal(t, metav1.ConditionTrue, condition.Status) + assert.Contains(t, condition.Message, "2026.6.5") + assert.Contains(t, condition.Message, "2026.6.8") + }) + + t.Run("should clear VersionDowngrade condition on upgrade", func(t *testing.T) { + t.Cleanup(func() { + deleteAndWaitAllResources(t, namespace) + }) + + ctx := context.Background() + secret := createTestAPIKeySecret(aiModelSecret, namespace, aiModelSecretKey, aiModelSecretValue) + require.NoError(t, k8sClient.Create(ctx, secret)) + + instance := &clawv1alpha1.Claw{} + instance.Name = testInstanceName + instance.Namespace = namespace + instance.Spec.Credentials = testCredentials() + instance.Spec.Version = "2026.6.8" + require.NoError(t, k8sClient.Create(ctx, instance)) + + reconciler := createClawReconciler() + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + setCoreDeploymentsAvailable(t, ctx, testInstanceName, namespace) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + // Downgrade to trigger condition + updated := &clawv1alpha1.Claw{} + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + updated.Spec.Version = "2026.6.5" + require.NoError(t, k8sClient.Update(ctx, updated)) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + // Upgrade past the original version + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + updated.Spec.Version = "2026.6.10" + require.NoError(t, k8sClient.Update(ctx, updated)) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + condition := meta.FindStatusCondition(updated.Status.Conditions, + clawv1alpha1.ConditionTypeVersionDowngrade) + assert.Nil(t, condition, "VersionDowngrade condition should be removed") + }) +} + +func TestInitContainerFailureSurfacing(t *testing.T) { + t.Run("should surface init container failure in Ready condition", func(t *testing.T) { + t.Cleanup(func() { + deleteAndWaitAllResources(t, namespace) + }) + + ctx := context.Background() + secret := createTestAPIKeySecret(aiModelSecret, namespace, aiModelSecretKey, aiModelSecretValue) + require.NoError(t, k8sClient.Create(ctx, secret)) + + instance := &clawv1alpha1.Claw{} + instance.Name = testInstanceName + instance.Namespace = namespace + instance.Spec.Credentials = testCredentials() + require.NoError(t, k8sClient.Create(ctx, instance)) + + reconciler := createClawReconciler() + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + // Create a pod with init container failure status + deployment := &appsv1.Deployment{} + waitFor(t, timeout, interval, func() bool { + return k8sClient.Get(ctx, client.ObjectKey{ + Name: testInstanceName, Namespace: namespace, + }, deployment) == nil + }, "deployment should exist") + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName + "-test-pod", + Namespace: namespace, + Labels: deployment.Spec.Selector.MatchLabels, + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + {Name: "init-plugins", Image: "busybox"}, + }, + Containers: []corev1.Container{ + {Name: "gateway", Image: "busybox"}, + }, + }, + } + require.NoError(t, k8sClient.Create(ctx, pod)) + + pod.Status.InitContainerStatuses = []corev1.ContainerStatus{ + { + Name: "init-plugins", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Error", + }, + }, + }, + } + require.NoError(t, k8sClient.Status().Update(ctx, pod)) + + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + + updated := &clawv1alpha1.Claw{} + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + condition := meta.FindStatusCondition(updated.Status.Conditions, + clawv1alpha1.ConditionTypeReady) + require.NotNil(t, condition) + assert.Equal(t, metav1.ConditionFalse, condition.Status) + assert.Equal(t, clawv1alpha1.ConditionReasonInitContainerFailure, condition.Reason) + assert.Contains(t, condition.Message, "init-plugins") + assert.Contains(t, condition.Message, "exit 1") + }) +} From 46d6076beac130dda0764d649bb0d8b2dc88be02 Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Tue, 16 Jun 2026 14:03:37 -0400 Subject: [PATCH 2/3] fix: address PR review feedback on error handling and condition gating - compareCalver returns (int, bool) so callers can distinguish malformed versions from equal ones instead of silently treating parse failures as "equal" - Plugin compatibility check moved outside the plugin-installation gate so warnings surface regardless of config management mode - checkPodInitFailures returns (string, error) so inspection failures are logged rather than silently swallowed Co-Authored-By: Claude Opus 4.6 --- internal/controller/claw_plugins.go | 23 ++++++++++----- internal/controller/claw_plugins_test.go | 29 ++++++++++--------- .../controller/claw_resource_controller.go | 17 +++++------ internal/controller/claw_status.go | 23 ++++++++++----- 4 files changed, 53 insertions(+), 39 deletions(-) diff --git a/internal/controller/claw_plugins.go b/internal/controller/claw_plugins.go index b7aa6689..5765e3cb 100644 --- a/internal/controller/claw_plugins.go +++ b/internal/controller/claw_plugins.go @@ -260,8 +260,8 @@ func pluginsNoProxy(instance *clawv1alpha1.Claw) string { // compareCalver compares two calver version strings (e.g. "2026.6.5"). // Returns -1 if a < b, 0 if a == b, 1 if a > b. -// Returns 0 for malformed input (fail-open). -func compareCalver(a, b string) int { +// The bool is false when either string is malformed (non-numeric segments). +func compareCalver(a, b string) (int, bool) { aParts := strings.Split(a, ".") bParts := strings.Split(b, ".") @@ -276,23 +276,23 @@ func compareCalver(a, b string) int { if i < len(aParts) { aVal, err = strconv.Atoi(aParts[i]) if err != nil { - return 0 + return 0, false } } if i < len(bParts) { bVal, err = strconv.Atoi(bParts[i]) if err != nil { - return 0 + return 0, false } } if aVal < bVal { - return -1 + return -1, true } if aVal > bVal { - return 1 + return 1, true } } - return 0 + return 0, true } // checkPluginCompatibility checks whether any implicitly required plugin @@ -310,7 +310,14 @@ func checkPluginCompatibility(instance *clawv1alpha1.Claw) string { if !ok || defaults.VertexPlugin == "" || defaults.PluginMinVersion == "" { continue } - if compareCalver(instance.Spec.Version, defaults.PluginMinVersion) < 0 { + cmp, ok := compareCalver(instance.Spec.Version, defaults.PluginMinVersion) + if !ok { + return fmt.Sprintf( + "cannot check plugin compatibility: spec.version %q is not a valid CalVer string", + instance.Spec.Version, + ) + } + if cmp < 0 { return fmt.Sprintf( "plugin %s requires OpenClaw >= %s, but spec.version is %s", defaults.VertexPlugin, defaults.PluginMinVersion, instance.Spec.Version, diff --git a/internal/controller/claw_plugins_test.go b/internal/controller/claw_plugins_test.go index e88bba3d..270ed035 100644 --- a/internal/controller/claw_plugins_test.go +++ b/internal/controller/claw_plugins_test.go @@ -802,23 +802,26 @@ const ( func TestCompareCalver(t *testing.T) { tests := []struct { - name string - a, b string - want int + name string + a, b string + want int + wantOK bool }{ - {"a less than b", testVersionOld, testVersionMinPlugin, -1}, - {"a greater than b", testVersionMinPlugin, testVersionOld, 1}, - {"equal", testVersionMinPlugin, testVersionMinPlugin, 0}, - {"numeric not lexicographic", "2026.10.1", "2026.9.30", 1}, - {"year difference", "2027.1.1", "2026.12.31", 1}, - {"different segment count", "2026.6", "2026.6.0", 0}, - {"malformed a", "invalid", testVersionMinPlugin, 0}, - {"malformed b", testVersionOld, "bad", 0}, - {"both empty", "", "", 0}, + {"a less than b", testVersionOld, testVersionMinPlugin, -1, true}, + {"a greater than b", testVersionMinPlugin, testVersionOld, 1, true}, + {"equal", testVersionMinPlugin, testVersionMinPlugin, 0, true}, + {"numeric not lexicographic", "2026.10.1", "2026.9.30", 1, true}, + {"year difference", "2027.1.1", "2026.12.31", 1, true}, + {"different segment count", "2026.6", "2026.6.0", 0, true}, + {"malformed a", "invalid", testVersionMinPlugin, 0, false}, + {"malformed b", testVersionOld, "bad", 0, false}, + {"both empty", "", "", 0, false}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.want, compareCalver(tt.a, tt.b)) + got, ok := compareCalver(tt.a, tt.b) + assert.Equal(t, tt.want, got) + assert.Equal(t, tt.wantOK, ok) }) } } diff --git a/internal/controller/claw_resource_controller.go b/internal/controller/claw_resource_controller.go index bbe1db85..e903637c 100644 --- a/internal/controller/claw_resource_controller.go +++ b/internal/controller/claw_resource_controller.go @@ -925,23 +925,20 @@ func (r *ClawResourceReconciler) configureDeployments( return fmt.Errorf("failed to configure metrics sidecar: %w", err) } } + if warning := checkPluginCompatibility(instance); warning != "" { + setCondition(instance, clawv1alpha1.ConditionTypePluginCompatibility, + metav1.ConditionFalse, clawv1alpha1.ConditionReasonIncompatible, warning) + } else { + meta.RemoveStatusCondition(&instance.Status.Conditions, + clawv1alpha1.ConditionTypePluginCompatibility) + } if !pluginInstallationDisabled(instance) { - if warning := checkPluginCompatibility(instance); warning != "" { - setCondition(instance, clawv1alpha1.ConditionTypePluginCompatibility, - metav1.ConditionFalse, clawv1alpha1.ConditionReasonIncompatible, warning) - } else { - meta.RemoveStatusCondition(&instance.Status.Conditions, - clawv1alpha1.ConditionTypePluginCompatibility) - } plugins := effectivePlugins(instance) if len(plugins) > 0 { if err := configurePluginsInitContainer(objects, instance, plugins); err != nil { return fmt.Errorf("failed to configure plugins init container: %w", err) } } - } else { - meta.RemoveStatusCondition(&instance.Status.Conditions, - clawv1alpha1.ConditionTypePluginCompatibility) } if err := configureAgentFiles(objects, instance); err != nil { return fmt.Errorf("failed to configure agent files: %w", err) diff --git a/internal/controller/claw_status.go b/internal/controller/claw_status.go index 1d601665..9a2b15a3 100644 --- a/internal/controller/claw_status.go +++ b/internal/controller/claw_status.go @@ -200,17 +200,17 @@ func setReadyConditionWithDetail( func (r *ClawResourceReconciler) checkPodInitFailures( ctx context.Context, namespace, deploymentName string, -) string { +) (string, error) { deployment := &appsv1.Deployment{} if err := r.Get(ctx, client.ObjectKey{ Namespace: namespace, Name: deploymentName, }, deployment); err != nil { - return "" + return "", fmt.Errorf("get deployment %s: %w", deploymentName, err) } selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) if err != nil { - return "" + return "", fmt.Errorf("build selector for %s: %w", deploymentName, err) } pods := &corev1.PodList{} @@ -218,7 +218,7 @@ func (r *ClawResourceReconciler) checkPodInitFailures( client.InNamespace(namespace), client.MatchingLabelsSelector{Selector: selector}, ); err != nil { - return "" + return "", fmt.Errorf("list pods for %s: %w", deploymentName, err) } var failures []string @@ -242,9 +242,9 @@ func (r *ClawResourceReconciler) checkPodInitFailures( } if len(failures) == 0 { - return "" + return "", nil } - return strings.Join(failures, "; ") + return strings.Join(failures, "; "), nil } // getGatewayToken fetches the gateway token from the gateway token Secret and Base64-decodes it. @@ -301,10 +301,16 @@ func (r *ClawResourceReconciler) updateStatus(ctx context.Context, instance *cla } // When deployments are not ready, inspect pods for init container failures + logger := log.FromContext(ctx) var initFailureDetail string if !ready { for _, depName := range pending { - if detail := r.checkPodInitFailures(ctx, instance.Namespace, depName); detail != "" { + detail, err := r.checkPodInitFailures(ctx, instance.Namespace, depName) + if err != nil { + logger.Error(err, "failed to inspect init container failures", "deployment", depName) + continue + } + if detail != "" { initFailureDetail = detail break } @@ -341,9 +347,10 @@ func (r *ClawResourceReconciler) updateStatus(ctx context.Context, instance *cla } // Version downgrade detection + cmp, cmpOK := compareCalver(instance.Spec.Version, instance.Status.LastDeployedVersion) if instance.Spec.Version != "" && instance.Status.LastDeployedVersion != "" && - compareCalver(instance.Spec.Version, instance.Status.LastDeployedVersion) < 0 { + cmpOK && cmp < 0 { setCondition(instance, clawv1alpha1.ConditionTypeVersionDowngrade, metav1.ConditionTrue, From f086d4d2ef844ba4e94f80e70ad9c8fa13f48e1e Mon Sep 17 00:00:00 2001 From: Ryan Cook Date: Wed, 17 Jun 2026 08:37:57 -0400 Subject: [PATCH 3/3] fix: make LastDeployedVersion a high-water mark so version downgrade warning persists The VersionDowngrade condition was being cleared on the next reconcile because LastDeployedVersion was unconditionally overwritten to the downgraded version when deployments became ready. Now it only updates upward, preserving the previous version for comparison. Co-Authored-By: Claude Opus 4.6 --- internal/controller/claw_plugins_test.go | 8 +++----- internal/controller/claw_status.go | 8 ++++++-- internal/controller/claw_status_test.go | 11 +++++++++++ 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/internal/controller/claw_plugins_test.go b/internal/controller/claw_plugins_test.go index 270ed035..8656782a 100644 --- a/internal/controller/claw_plugins_test.go +++ b/internal/controller/claw_plugins_test.go @@ -260,7 +260,7 @@ func TestConfigurePluginsInitContainer(t *testing.T) { ) pluginInit := initContainers[3].(map[string]any) volumeMounts := pluginInit["volumeMounts"].([]any) - require.Len(t, volumeMounts, 5) + require.Len(t, volumeMounts, 3) mountPaths := make(map[string]string) for _, vm := range volumeMounts { @@ -268,9 +268,7 @@ func TestConfigurePluginsInitContainer(t *testing.T) { mountPaths[m["mountPath"].(string)] = m["name"].(string) } - assert.Equal(t, "claw-home", mountPaths["/home/node/.openclaw"]) - assert.Equal(t, "claw-home", mountPaths["/home/node/.local"]) - assert.Equal(t, "claw-home", mountPaths["/home/node/.cache"]) + assert.Equal(t, "claw-home", mountPaths["/home/node"]) assert.Equal(t, "proxy-ca", mountPaths["/etc/proxy-ca"]) assert.Equal(t, "tmp-volume", mountPaths["/tmp"]) }) @@ -639,7 +637,7 @@ func TestPluginsIntegration(t *testing.T) { for _, vm := range ic.VolumeMounts { mountPaths[vm.MountPath] = vm.Name } - assert.Equal(t, "claw-home", mountPaths["/home/node/.openclaw"]) + assert.Equal(t, "claw-home", mountPaths["/home/node"]) assert.Equal(t, "proxy-ca", mountPaths["/etc/proxy-ca"]) assert.Equal(t, "tmp-volume", mountPaths["/tmp"]) break diff --git a/internal/controller/claw_status.go b/internal/controller/claw_status.go index 9a2b15a3..4504756f 100644 --- a/internal/controller/claw_status.go +++ b/internal/controller/claw_status.go @@ -365,9 +365,13 @@ func (r *ClawResourceReconciler) updateStatus(ctx context.Context, instance *cla clawv1alpha1.ConditionTypeVersionDowngrade) } - // Track last deployed version when ready + // Track last deployed version when ready (high-water mark: only update upward + // so that a downgrade preserves the previous version for comparison) if ready && instance.Spec.Version != "" { - instance.Status.LastDeployedVersion = instance.Spec.Version + cmpTrack, cmpTrackOK := compareCalver(instance.Spec.Version, instance.Status.LastDeployedVersion) + if instance.Status.LastDeployedVersion == "" || !cmpTrackOK || cmpTrack >= 0 { + instance.Status.LastDeployedVersion = instance.Spec.Version + } } recordClawMetrics(instance) diff --git a/internal/controller/claw_status_test.go b/internal/controller/claw_status_test.go index 9da5434c..01d16976 100644 --- a/internal/controller/claw_status_test.go +++ b/internal/controller/claw_status_test.go @@ -1320,6 +1320,17 @@ func TestVersionDowngradeDetection(t *testing.T) { assert.Equal(t, metav1.ConditionTrue, condition.Status) assert.Contains(t, condition.Message, "2026.6.5") assert.Contains(t, condition.Message, "2026.6.8") + + // Condition must persist across subsequent reconciles (LastDeployedVersion + // is a high-water mark and should not be overwritten by the downgraded version) + reconcileClaw(t, ctx, reconciler, testInstanceName, namespace) + require.NoError(t, k8sClient.Get(ctx, + client.ObjectKey{Name: testInstanceName, Namespace: namespace}, updated)) + condition = meta.FindStatusCondition(updated.Status.Conditions, + clawv1alpha1.ConditionTypeVersionDowngrade) + require.NotNil(t, condition, "VersionDowngrade condition should persist across reconciles") + assert.Equal(t, "2026.6.8", updated.Status.LastDeployedVersion, + "LastDeployedVersion should not be overwritten by a downgraded version") }) t.Run("should clear VersionDowngrade condition on upgrade", func(t *testing.T) {