Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/external/nova/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ const (
CreateIntent v1alpha1.SchedulingIntent = "create"
// ReserveForFailoverIntent indicates that the request is for failover reservation scheduling.
ReserveForFailoverIntent v1alpha1.SchedulingIntent = "reserve_for_failover"
// ReserveForCommittedResourceIntent indicates that the request is for CR reservation scheduling.
ReserveForCommittedResourceIntent v1alpha1.SchedulingIntent = "reserve_for_committed_resource"
)

// GetIntent analyzes the request spec and determines the intent of the scheduling request.
Expand All @@ -165,6 +167,9 @@ func (req ExternalSchedulerRequest) GetIntent() (v1alpha1.SchedulingIntent, erro
// Used by cortex failover reservation controller
case "reserve_for_failover":
return ReserveForFailoverIntent, nil
// Used by cortex committed resource reservation controller
case "reserve_for_committed_resource":
return ReserveForCommittedResourceIntent, nil
default:
return CreateIntent, nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,22 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa
continue // Not handled by us (no resource group set).
}

// For committed resource reservations: unlock resources only if:
// 1. Project ID matches
// 2. ResourceGroup matches the flavor's hw_version
if !s.Options.LockReserved &&
// Check if this is a CR reservation scheduling request.
// If so, we should NOT unlock any CR reservations to prevent overbooking.
// CR capacity should only be unlocked for actual VM scheduling.
intent, err := request.GetIntent()
switch {
case err == nil && intent == api.ReserveForCommittedResourceIntent:
traceLog.Debug("keeping CR reservation locked for CR reservation scheduling",
"reservation", reservation.Name,
"intent", intent)
// Don't continue - fall through to block the resources
case !s.Options.LockReserved &&
// For committed resource reservations: unlock resources only if:
// 1. Project ID matches
// 2. ResourceGroup matches the flavor's hw_version
reservation.Spec.CommittedResourceReservation.ProjectID == request.Spec.Data.ProjectID &&
reservation.Spec.CommittedResourceReservation.ResourceGroup == request.Spec.Data.Flavor.Data.ExtraSpecs["hw_version"] {
reservation.Spec.CommittedResourceReservation.ResourceGroup == request.Spec.Data.Flavor.Data.ExtraSpecs["hw_version"]:
traceLog.Info("unlocking resources reserved by matching committed resource reservation with allocation",
"reservation", reservation.Name,
"instanceUUID", request.Spec.Data.InstanceUUID,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ func parseMemoryToMB(memory string) uint64 {
}

func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory string, evacuation bool, hosts []string) api.ExternalSchedulerRequest { //nolint:unparam // vcpus varies in real usage
return newNovaRequestWithIntent(instanceUUID, projectID, flavorName, flavorGroup, vcpus, memory, "", evacuation, hosts)
}

// newNovaRequestWithIntent creates a nova request with a specific intent.
// intentHint can be: "evacuate", "reserve_for_committed_resource", "reserve_for_failover", or "" for create.
func newNovaRequestWithIntent(instanceUUID, projectID, flavorName, flavorGroup string, vcpus int, memory, intentHint string, evacuation bool, hosts []string) api.ExternalSchedulerRequest {
hostList := make([]api.ExternalSchedulerHost, len(hosts))
for i, h := range hosts {
hostList[i] = api.ExternalSchedulerHost{ComputeHost: h}
Expand All @@ -225,6 +231,10 @@ func newNovaRequest(instanceUUID, projectID, flavorName, flavorGroup string, vcp
schedulerHints = map[string]any{
"_nova_check_type": []any{"evacuate"},
}
} else if intentHint != "" {
schedulerHints = map[string]any{
"_nova_check_type": intentHint,
}
}

memoryMB := parseMemoryToMB(memory)
Expand Down Expand Up @@ -802,6 +812,180 @@ func TestFilterHasEnoughCapacity_IgnoredReservationTypes(t *testing.T) {
}
}

func TestFilterHasEnoughCapacity_ReserveForCommittedResourceIntent(t *testing.T) {
scheme := buildTestScheme(t)

// Test that when scheduling a CR reservation (with reserve_for_committed_resource intent),
// other CR reservations from the same project+flavor group are NOT unlocked.
// This prevents overbooking when scheduling multiple CR reservations.
tests := []struct {
name string
hypervisors []*hv1.Hypervisor
reservations []*v1alpha1.Reservation
request api.ExternalSchedulerRequest
opts FilterHasEnoughCapacityOpts
expectedHosts []string
filteredHosts []string
}{
{
name: "CR reservation scheduling: same project+flavor reservations stay locked (prevents overbooking)",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free
newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free
},
reservations: []*v1alpha1.Reservation{
// Existing CR reservation on host1 for same project+flavor group
newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Request with reserve_for_committed_resource intent (scheduling a new CR reservation)
request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1", "host2"}),
opts: FilterHasEnoughCapacityOpts{LockReserved: false}, // Note: LockReserved is false, but intent overrides
expectedHosts: []string{"host2"}, // host1 blocked because existing-cr stays locked
filteredHosts: []string{"host1"},
},
{
name: "Normal VM scheduling: same project+flavor reservations ARE unlocked",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free
newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free
},
reservations: []*v1alpha1.Reservation{
// Existing CR reservation on host1 for same project+flavor group
newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Normal VM create request (no special intent) - CR reservation should be unlocked
request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 4, "8Gi", false, []string{"host1", "host2"}),
opts: FilterHasEnoughCapacityOpts{LockReserved: false},
expectedHosts: []string{"host1", "host2"}, // host1 passes because existing-cr is unlocked for matching project+flavor
filteredHosts: []string{},
},
{
name: "CR reservation scheduling: different project reservations stay locked (as expected)",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free
newHypervisor("host2", "16", "8", "32Gi", "16Gi"), // 8 CPU free
},
reservations: []*v1alpha1.Reservation{
// Existing CR reservation on host1 for different project
newCommittedReservation("other-project-cr", "host1", "host1", "project-B", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Request with reserve_for_committed_resource intent
request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1", "host2"}),
opts: FilterHasEnoughCapacityOpts{LockReserved: false},
expectedHosts: []string{"host2"},
filteredHosts: []string{"host1"}, // host1 blocked by other project's reservation (would be blocked anyway)
},
{
name: "CR reservation scheduling: multiple reservations - none unlocked",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "32", "0", "64Gi", "0"), // 32 CPU free
},
reservations: []*v1alpha1.Reservation{
// Three existing CR reservations on host1 for same project+flavor group
newCommittedReservation("cr-1", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
newCommittedReservation("cr-2", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
newCommittedReservation("cr-3", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Request with reserve_for_committed_resource intent, needs 10 CPU
// After blocking all 3 reservations (24 CPU), only 8 CPU free -> should fail
request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 10, "20Gi", "reserve_for_committed_resource", false, []string{"host1"}),
opts: FilterHasEnoughCapacityOpts{LockReserved: false},
expectedHosts: []string{},
filteredHosts: []string{"host1"}, // All reservations stay locked, not enough capacity
},
{
name: "Normal VM scheduling: multiple reservations - all unlocked for same project+flavor",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "32", "0", "64Gi", "0"), // 32 CPU free
},
reservations: []*v1alpha1.Reservation{
// Three existing CR reservations on host1 for same project+flavor group
newCommittedReservation("cr-1", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
newCommittedReservation("cr-2", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
newCommittedReservation("cr-3", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Normal VM create request, needs 10 CPU
// All 3 reservations unlocked for matching project+flavor -> 32 CPU free -> should pass
request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 10, "20Gi", false, []string{"host1"}),
opts: FilterHasEnoughCapacityOpts{LockReserved: false},
expectedHosts: []string{"host1"}, // All reservations unlocked, enough capacity
filteredHosts: []string{},
},
{
name: "CR reservation scheduling: IgnoredReservationTypes config DOES bypass intent protection (safety override)",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free
},
reservations: []*v1alpha1.Reservation{
// Existing CR reservation on host1 blocks all 8 free CPU
newCommittedReservation("existing-cr", "host1", "host1", "project-A", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Request with reserve_for_committed_resource intent
// IgnoredReservationTypes is a safety flag that overrides everything, including intent
request: newNovaRequestWithIntent("new-reservation-uuid", "project-A", "m1.large", "gp-1", 4, "8Gi", "reserve_for_committed_resource", false, []string{"host1"}),
opts: FilterHasEnoughCapacityOpts{
LockReserved: false,
// IgnoredReservationTypes is a safety override - ignores CR even for CR scheduling
IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource},
},
expectedHosts: []string{"host1"}, // CR reservation is ignored via IgnoredReservationTypes (safety override)
filteredHosts: []string{},
},
{
name: "Normal VM scheduling: IgnoredReservationTypes config DOES work for normal VMs",
hypervisors: []*hv1.Hypervisor{
newHypervisor("host1", "16", "8", "32Gi", "16Gi"), // 8 CPU free
},
reservations: []*v1alpha1.Reservation{
// Existing CR reservation on host1 blocks all 8 free CPU
newCommittedReservation("existing-cr", "host1", "host1", "project-B", "m1.large", "gp-1", "8", "16Gi", nil, nil),
},
// Normal VM create request (different project, so unlocking via project match won't work)
// But IgnoredReservationTypes should make it work
request: newNovaRequest("vm-instance-123", "project-A", "m1.large", "gp-1", 4, "8Gi", false, []string{"host1"}),
opts: FilterHasEnoughCapacityOpts{
LockReserved: false,
IgnoredReservationTypes: []v1alpha1.ReservationType{v1alpha1.ReservationTypeCommittedResource},
},
expectedHosts: []string{"host1"}, // CR reservation ignored via IgnoredReservationTypes
filteredHosts: []string{},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
objects := make([]client.Object, 0, len(tt.hypervisors)+len(tt.reservations))
for _, h := range tt.hypervisors {
objects = append(objects, h.DeepCopy())
}
for _, r := range tt.reservations {
objects = append(objects, r.DeepCopy())
}

step := &FilterHasEnoughCapacity{}
step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build()
step.Options = tt.opts

result, err := step.Run(slog.Default(), tt.request)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}

for _, host := range tt.expectedHosts {
if _, ok := result.Activations[host]; !ok {
t.Errorf("expected host %s to be present in activations, but got %+v", host, result.Activations)
}
}

for _, host := range tt.filteredHosts {
if _, ok := result.Activations[host]; ok {
t.Errorf("expected host %s to be filtered out, but it was present", host)
}
}
})
}
}

func TestFilterHasEnoughCapacity_NilEffectiveCapacityFallback(t *testing.T) {
scheme := buildTestScheme(t)

Expand Down
5 changes: 5 additions & 0 deletions internal/scheduling/reservations/commitments/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,11 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr
EligibleHosts: eligibleHosts,
Pipeline: pipelineName,
AvailabilityZone: availabilityZone,
// Set hint to indicate this is a CR reservation scheduling request.
// This prevents other CR reservations from being unlocked during capacity filtering.
SchedulerHints: map[string]any{
"_nova_check_type": string(schedulerdelegationapi.ReserveForCommittedResourceIntent),
},
}

scheduleResp, err := r.SchedulerClient.ScheduleReservation(ctx, scheduleReq)
Expand Down
Loading