From 06fbb0684666f0dc9b7435b6f86f7dd40fdc4834 Mon Sep 17 00:00:00 2001
From: Foreman Bot <chris@mahercode.io>
Date: Sun, 21 Jun 2026 01:05:48 -0700
Subject: [PATCH] fix: preserve agent-written schedulingStatus on
 InferenceService status update

The metal-agent writes status.schedulingStatus and status.schedulingMessage
(e.g. "MemoryCheckFailed", "InsufficientMemory") when admission rejects an
InferenceService. The controller's reconcile loop subsequently updated the
status without preserving those fields, so they read back empty almost
immediately.

The fix removes the else branch in updateStatusWithSchedulingInfo that
unconditionally cleared SchedulingStatus, SchedulingMessage, and WaitingFor
when schedulingInfo is nil. When schedulingInfo is nil (the common case for
non-GPU-scheduling scenarios), the existing agent-written values are now
preserved.

A regression test verifies that agent-written scheduling fields survive a
controller reconcile.

Fixes #643

Signed-off-by: Foreman Bot <chris@mahercode.io>
---
 .../inferenceservice_reconcile_test.go        | 62 +++++++++++++++++++
 internal/controller/status_builder.go         |  7 +--
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/internal/controller/inferenceservice_reconcile_test.go b/internal/controller/inferenceservice_reconcile_test.go
index d411159f..d5ad6705 100644
--- a/internal/controller/inferenceservice_reconcile_test.go
+++ b/internal/controller/inferenceservice_reconcile_test.go
@@ -1036,5 +1036,67 @@ var _ = Describe("Reconcile lifecycle", func() {
 			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ModelCachePVCName, Namespace: "default"}, pvc)).To(Succeed())
 			Expect(pvc.OwnerReferences).To(BeEmpty())
 		})
+
+		It("should preserve agent-written schedulingStatus on status update", func() {
+			modelName := "model-sched-preserve"
+			isvcName := "isvc-sched-preserve"
+
+			model := &inferencev1alpha1.Model{
+				ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: "default"},
+				Spec: inferencev1alpha1.ModelSpec{
+					Source:   "https://example.com/model.gguf",
+					Hardware: &inferencev1alpha1.HardwareSpec{Accelerator: "cpu"},
+				},
+			}
+			Expect(k8sClient.Create(ctx, model)).To(Succeed())
+			defer func() { _ = k8sClient.Delete(ctx, model) }()
+
+			model.Status.Phase = PhaseReady
+			Expect(k8sClient.Status().Update(ctx, model)).To(Succeed())
+
+			replicas := int32(1)
+			isvc := &inferencev1alpha1.InferenceService{
+				ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: "default"},
+				Spec: inferencev1alpha1.InferenceServiceSpec{
+					ModelRef: modelName,
+					Replicas: &replicas,
+					Image:    "ghcr.io/ggml-org/llama.cpp:server",
+				},
+			}
+			Expect(k8sClient.Create(ctx, isvc)).To(Succeed())
+			defer func() {
+				_ = k8sClient.Delete(ctx, isvc)
+				dep := &appsv1.Deployment{}
+				if err := k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, dep); err == nil {
+					_ = k8sClient.Delete(ctx, dep)
+				}
+				svc := &corev1.Service{}
+				if err := k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, svc); err == nil {
+					_ = k8sClient.Delete(ctx, svc)
+				}
+			}()
+
+			// Simulate the metal-agent writing a scheduling rejection.
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, isvc)).To(Succeed())
+			isvc.Status.SchedulingStatus = "MemoryCheckFailed"
+			isvc.Status.SchedulingMessage = "host memory insufficient for model"
+			Expect(k8sClient.Status().Update(ctx, isvc)).To(Succeed())
+
+			reconciler := &InferenceServiceReconciler{
+				Client:             k8sClient,
+				Scheme:             k8sClient.Scheme(),
+				InitContainerImage: "docker.io/curlimages/curl:8.18.0",
+			}
+			_, err := reconciler.Reconcile(ctx, reconcile.Request{
+				NamespacedName: types.NamespacedName{Name: isvcName, Namespace: "default"},
+			})
+			Expect(err).NotTo(HaveOccurred())
+
+			updated := &inferencev1alpha1.InferenceService{}
+			Expect(k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, updated)).To(Succeed())
+			// The controller must not clobber the agent-written scheduling fields.
+			Expect(updated.Status.SchedulingStatus).To(Equal("MemoryCheckFailed"))
+			Expect(updated.Status.SchedulingMessage).To(Equal("host memory insufficient for model"))
+		})
 	})
 })
diff --git a/internal/controller/status_builder.go b/internal/controller/status_builder.go
index 0ce67a83..08d7f77e 100644
--- a/internal/controller/status_builder.go
+++ b/internal/controller/status_builder.go
@@ -135,11 +135,10 @@ func (r *InferenceServiceReconciler) updateStatusWithSchedulingInfo(
 		isvc.Status.SchedulingStatus = schedulingInfo.Status
 		isvc.Status.SchedulingMessage = schedulingInfo.Message
 		isvc.Status.WaitingFor = schedulingInfo.WaitingFor
-	} else {
-		isvc.Status.SchedulingStatus = ""
-		isvc.Status.SchedulingMessage = ""
-		isvc.Status.WaitingFor = ""
 	}
+	// When schedulingInfo is nil, preserve agent-written scheduling fields
+	// (e.g. InsufficientMemory, MemoryCheckFailed) so the controller does not
+	// clobber them on its next status update (#643).
 
 	if phase == PhaseWaitingForGPU {
 		queuePos, err := r.calculateQueuePosition(ctx, isvc)