diff --git a/internal/controller/inferenceservice_reconcile_test.go b/internal/controller/inferenceservice_reconcile_test.go index d411159..d5ad670 100644 --- a/internal/controller/inferenceservice_reconcile_test.go +++ b/internal/controller/inferenceservice_reconcile_test.go @@ -1036,5 +1036,67 @@ var _ = Describe("Reconcile lifecycle", func() { Expect(k8sClient.Get(ctx, types.NamespacedName{Name: ModelCachePVCName, Namespace: "default"}, pvc)).To(Succeed()) Expect(pvc.OwnerReferences).To(BeEmpty()) }) + + It("should preserve agent-written schedulingStatus on status update", func() { + modelName := "model-sched-preserve" + isvcName := "isvc-sched-preserve" + + model := &inferencev1alpha1.Model{ + ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: "default"}, + Spec: inferencev1alpha1.ModelSpec{ + Source: "https://example.com/model.gguf", + Hardware: &inferencev1alpha1.HardwareSpec{Accelerator: "cpu"}, + }, + } + Expect(k8sClient.Create(ctx, model)).To(Succeed()) + defer func() { _ = k8sClient.Delete(ctx, model) }() + + model.Status.Phase = PhaseReady + Expect(k8sClient.Status().Update(ctx, model)).To(Succeed()) + + replicas := int32(1) + isvc := &inferencev1alpha1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{Name: isvcName, Namespace: "default"}, + Spec: inferencev1alpha1.InferenceServiceSpec{ + ModelRef: modelName, + Replicas: &replicas, + Image: "ghcr.io/ggml-org/llama.cpp:server", + }, + } + Expect(k8sClient.Create(ctx, isvc)).To(Succeed()) + defer func() { + _ = k8sClient.Delete(ctx, isvc) + dep := &appsv1.Deployment{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, dep); err == nil { + _ = k8sClient.Delete(ctx, dep) + } + svc := &corev1.Service{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, svc); err == nil { + _ = k8sClient.Delete(ctx, svc) + } + }() + + // Simulate the metal-agent writing a scheduling rejection. + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, isvc)).To(Succeed()) + isvc.Status.SchedulingStatus = "MemoryCheckFailed" + isvc.Status.SchedulingMessage = "host memory insufficient for model" + Expect(k8sClient.Status().Update(ctx, isvc)).To(Succeed()) + + reconciler := &InferenceServiceReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + InitContainerImage: "docker.io/curlimages/curl:8.18.0", + } + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: isvcName, Namespace: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + + updated := &inferencev1alpha1.InferenceService{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: isvcName, Namespace: "default"}, updated)).To(Succeed()) + // The controller must not clobber the agent-written scheduling fields. + Expect(updated.Status.SchedulingStatus).To(Equal("MemoryCheckFailed")) + Expect(updated.Status.SchedulingMessage).To(Equal("host memory insufficient for model")) + }) }) }) diff --git a/internal/controller/status_builder.go b/internal/controller/status_builder.go index 0ce67a8..08d7f77 100644 --- a/internal/controller/status_builder.go +++ b/internal/controller/status_builder.go @@ -135,11 +135,10 @@ func (r *InferenceServiceReconciler) updateStatusWithSchedulingInfo( isvc.Status.SchedulingStatus = schedulingInfo.Status isvc.Status.SchedulingMessage = schedulingInfo.Message isvc.Status.WaitingFor = schedulingInfo.WaitingFor - } else { - isvc.Status.SchedulingStatus = "" - isvc.Status.SchedulingMessage = "" - isvc.Status.WaitingFor = "" } + // When schedulingInfo is nil, preserve agent-written scheduling fields + // (e.g. InsufficientMemory, MemoryCheckFailed) so the controller does not + // clobber them on its next status update (#643). if phase == PhaseWaitingForGPU { queuePos, err := r.calculateQueuePosition(ctx, isvc)