diff --git a/api/external/nova/messages.go b/api/external/nova/messages.go index a0a5a8616..c7b52451e 100644 --- a/api/external/nova/messages.go +++ b/api/external/nova/messages.go @@ -140,6 +140,8 @@ const ( CreateIntent v1alpha1.SchedulingIntent = "create" // ReserveForFailoverIntent indicates that the request is for failover reservation scheduling. ReserveForFailoverIntent v1alpha1.SchedulingIntent = "reserve_for_failover" + // ReserveForCommittedResourceIntent indicates that the request is for CR reservation scheduling. + ReserveForCommittedResourceIntent v1alpha1.SchedulingIntent = "reserve_for_committed_resource" ) // GetIntent analyzes the request spec and determines the intent of the scheduling request. @@ -165,6 +167,9 @@ func (req ExternalSchedulerRequest) GetIntent() (v1alpha1.SchedulingIntent, erro // Used by cortex failover reservation controller case "reserve_for_failover": return ReserveForFailoverIntent, nil + // Used by cortex committed resource reservation controller + case "reserve_for_committed_resource": + return ReserveForCommittedResourceIntent, nil default: return CreateIntent, nil } diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index ea37e8c11..d26c7c940 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -110,12 +110,22 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa continue // Not handled by us (no resource group set). } - // For committed resource reservations: unlock resources only if: - // 1. Project ID matches - // 2. ResourceGroup matches the flavor's hw_version - if !s.Options.LockReserved && + // Check if this is a CR reservation scheduling request. + // If so, we should NOT unlock any CR reservations to prevent overbooking. + // CR capacity should only be unlocked for actual VM scheduling. + intent, err := request.GetIntent() + switch { + case err == nil && intent == api.ReserveForCommittedResourceIntent: + traceLog.Debug("keeping CR reservation locked for CR reservation scheduling", + "reservation", reservation.Name, + "intent", intent) + // Don't continue - fall through to block the resources + case !s.Options.LockReserved && + // For committed resource reservations: unlock resources only if: + // 1. Project ID matches + // 2. ResourceGroup matches the flavor's hw_version reservation.Spec.CommittedResourceReservation.ProjectID == request.Spec.Data.ProjectID && - reservation.Spec.CommittedResourceReservation.ResourceGroup == request.Spec.Data.Flavor.Data.ExtraSpecs["hw_version"] { + reservation.Spec.CommittedResourceReservation.ResourceGroup == request.Spec.Data.Flavor.Data.ExtraSpecs["hw_version"]: traceLog.Info("unlocking resources reserved by matching committed resource reservation with allocation", "reservation", reservation.Name, "instanceUUID", request.Spec.Data.InstanceUUID, diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go index 002a92dc8..cabc3a3b4 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -210,6 +210,12 @@ func parseMemoryToMB(memory string) uint64 { } func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory string, evacuation bool, hosts []string) api.ExternalSchedulerRequest { //nolint:unparam // vcpus varies in real usage + return newNovaRequestWithIntent(instanceUUID, projectID, flavorName, flavorGroup, vcpus, memory, "", evacuation, hosts) +} + +// newNovaRequestWithIntent creates a nova request with a specific intent. +// intentHint can be: "evacuate", "reserve_for_committed_resource", "reserve_for_failover", or "" for create. +func newNovaRequestWithIntent(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory, intentHint string, evacuation bool, hosts []string) api.ExternalSchedulerRequest { hostList := make([]api.ExternalSchedulerHost, len(hosts)) for i, h := range hosts { hostList[i] = api.ExternalSchedulerHost{ComputeHost: h} @@ -225,6 +231,10 @@ func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcp schedulerHints = map[string]any{ "_nova_check_type": []any{"evacuate"}, } + } else if intentHint != "" { + schedulerHints = map[string]any{ + "_nova_check_type": intentHint, + } } memoryMB := parseMemoryToMB(memory) @@ -802,6 +812,180 @@ func TestFilterHasEnoughCapacity_IgnoredReservationTypes(t *testing.T) { } } +func TestFilterHasEnoughCapacity_ReserveForCommittedResourceIntent(t *testing.T) { + scheme := buildTestScheme(t) + + // Test that when scheduling a CR reservation (with reserve_for_committed_resource intent), + // other CR reservations from the same project+flavor group are NOT unlocked. + // This prevents overbooking when scheduling multiple CR reservations. + tests := []struct { + name string + hypervisors []*hv1.Hypervisor + reservations []*v1alpha1.Reservation + request api.ExternalSchedulerRequest + opts FilterHasEnoughCapacityOpts + expectedHosts []string + filteredHosts []string + }{ + { + name: "CR reservation scheduling: same project+flavor reservations stay locked (prevents overbooking)", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free + newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Existing CR reservation on host1 for same project+flavor group + newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Request with reserve_for_committed_resource intent (scheduling a new CR reservation) + request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1", "host2"}), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, // Note: LockReserved is false, but intent overrides + expectedHosts: []string{"host2"}, // host1 blocked because existing-cr stays locked + filteredHosts: []string{"host1"}, + }, + { + name: "Normal VM scheduling: same project+flavor reservations ARE unlocked", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free + newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Existing CR reservation on host1 for same project+flavor group + newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Normal VM create request (no special intent) - CR reservation should be unlocked + request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 4, "8Gi", false, []string{"host1", "host2"}), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2"}, // host1 passes because existing-cr is unlocked for matching project+flavor + filteredHosts: []string{}, + }, + { + name: "CR reservation scheduling: different project reservations stay locked (as expected)", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free + newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Existing CR reservation on host1 for different project + newCommittedReservation("other-project-cr", "host1", "host1", "project-B", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Request with reserve_for_committed_resource intent + request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1", "host2"}), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host2"}, + filteredHosts: []string{"host1"}, // host1 blocked by other project's reservation (would be blocked anyway) + }, + { + name: "CR reservation scheduling: multiple reservations - none unlocked", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "32", "0", "64Gi", "0"), // 32 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Three existing CR reservations on host1 for same project+flavor group + newCommittedReservation("cr-1", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + newCommittedReservation("cr-2", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + newCommittedReservation("cr-3", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Request with reserve_for_committed_resource intent, needs 10 CPU + // After blocking all 3 reservations (24 CPU), only 8 CPU free -> should fail + request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 10, "20Gi", "reserve_for_committed_resource", false, []string{"host1"}), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{}, + filteredHosts: []string{"host1"}, // All reservations stay locked, not enough capacity + }, + { + name: "Normal VM scheduling: multiple reservations - all unlocked for same project+flavor", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "32", "0", "64Gi", "0"), // 32 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Three existing CR reservations on host1 for same project+flavor group + newCommittedReservation("cr-1", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + newCommittedReservation("cr-2", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + newCommittedReservation("cr-3", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Normal VM create request, needs 10 CPU + // All 3 reservations unlocked for matching project+flavor -> 32 CPU free -> should pass + request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 10, "20Gi", false, []string{"host1"}), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1"}, // All reservations unlocked, enough capacity + filteredHosts: []string{}, + }, + { + name: "CR reservation scheduling: IgnoredReservationTypes config DOES bypass intent protection (safety override)", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Existing CR reservation on host1 blocks all 8 free CPU + newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Request with reserve_for_committed_resource intent + // IgnoredReservationTypes is a safety flag that overrides everything, including intent + request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1"}), + opts: FilterHasEnoughCapacityOpts{ + LockReserved: false, + // IgnoredReservationTypes is a safety override - ignores CR even for CR scheduling + IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource}, + }, + expectedHosts: []string{"host1"}, // CR reservation is ignored via IgnoredReservationTypes (safety override) + filteredHosts: []string{}, + }, + { + name: "Normal VM scheduling: IgnoredReservationTypes config DOES work for normal VMs", + hypervisors: []*hv1.Hypervisor{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free + }, + reservations: []*v1alpha1.Reservation{ + // Existing CR reservation on host1 blocks all 8 free CPU + newCommittedReservation("existing-cr", "host1", "host1", "project-B", "m1.large", "gp-1", "8", "16Gi", nil, nil), + }, + // Normal VM create request (different project, so unlocking via project match won't work) + // But IgnoredReservationTypes should make it work + request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 4, "8Gi", false, []string{"host1"}), + opts: FilterHasEnoughCapacityOpts{ + LockReserved: false, + IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource}, + }, + expectedHosts: []string{"host1"}, // CR reservation ignored via IgnoredReservationTypes + filteredHosts: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := make([]client.Object, 0, len(tt.hypervisors)+len(tt.reservations)) + for _, h := range tt.hypervisors { + objects = append(objects, h.DeepCopy()) + } + for _, r := range tt.reservations { + objects = append(objects, r.DeepCopy()) + } + + step := &FilterHasEnoughCapacity{} + step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + step.Options = tt.opts + + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + for _, host := range tt.expectedHosts { + if _, ok := result.Activations[host]; !ok { + t.Errorf("expected host %s to be present in activations, but got %+v", host, result.Activations) + } + } + + for _, host := range tt.filteredHosts { + if _, ok := result.Activations[host]; ok { + t.Errorf("expected host %s to be filtered out, but it was present", host) + } + } + }) + } +} + func TestFilterHasEnoughCapacity_NilEffectiveCapacityFallback(t *testing.T) { scheme := buildTestScheme(t) diff --git a/internal/scheduling/reservations/commitments/controller.go b/internal/scheduling/reservations/commitments/controller.go index 4a318d882..9c238aeee 100644 --- a/internal/scheduling/reservations/commitments/controller.go +++ b/internal/scheduling/reservations/commitments/controller.go @@ -264,6 +264,11 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr EligibleHosts: eligibleHosts, Pipeline: pipelineName, AvailabilityZone: availabilityZone, + // Set hint to indicate this is a CR reservation scheduling request. + // This prevents other CR reservations from being unlocked during capacity filtering. + SchedulerHints: map[string]any{ + "_nova_check_type": string(schedulerdelegationapi.ReserveForCommittedResourceIntent), + }, } scheduleResp, err := r.SchedulerClient.ScheduleReservation(ctx, scheduleReq)