From ea5079a3dd89293f2139c2c6fe1fc820210f689a Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Sun, 22 Mar 2026 14:58:58 +0100 Subject: [PATCH 1/5] Hack to shuffel host in evacuation request --- .../scheduling/nova/external_scheduler_api.go | 31 +++++ .../nova/external_scheduler_api_test.go | 111 ++++++++++++++++++ internal/scheduling/nova/integration_test.go | 3 +- 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index c3a5de071..2a258297c 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "log/slog" + "math/rand" "net/http" "slices" @@ -29,6 +30,8 @@ import ( type HTTPAPIConfig struct { // OpenStack projects that use experimental features. ExperimentalProjectIDs []string `json:"experimentalProjectIDs,omitempty"` + // Number of top hosts to shuffle for evacuation requests. Defaults to 3. + EvacuationShuffleK int `json:"evacuationShuffleK,omitempty"` } type HTTPAPIDelegate interface { @@ -137,6 +140,30 @@ func (httpAPI *httpAPI) inferPipelineName(requestData api.ExternalSchedulerReque } } +// shuffleTopHostsForEvacuation randomly reorders the first k hosts if the request +// is an evacuation. This helps distribute evacuated VMs across multiple hosts +// rather than concentrating them on the single "best" host. +func shuffleTopHostsForEvacuation(request api.ExternalSchedulerRequest, hosts []string, k int) []string { + intent, err := request.GetIntent() + if err != nil || intent != api.EvacuateIntent { + return hosts + } + if k <= 0 { + k = 3 + } + n := min(k, len(hosts)) + if n <= 1 { + return hosts + } + result := make([]string, len(hosts)) + copy(result, hosts) + rand.Shuffle(n, func(i, j int) { + result[i], result[j] = result[j], result[i] + }) + slog.Info("shuffled top hosts for evacuation", "k", n, "hosts", result[:n]) + return result +} + // Limit the external scheduler response to the hosts provided in the external // scheduler request. i.e. don't provide new hosts that weren't in the request, // since the Nova scheduler won't know how to handle them. @@ -251,6 +278,10 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req } hosts := decision.Status.Result.OrderedHosts hosts = limitHostsToRequest(requestData, hosts) + + // This is a hack to address the problem that Nova only uses the first host in hosts for evacuation requests. + // Only for evacuation we shuffle the first k hosts to ensure that we do not get stuck on a single host + hosts = shuffleTopHostsForEvacuation(requestData, hosts, httpAPI.config.EvacuationShuffleK) response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index 78b2a84b1..bcf4e0c6b 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -511,6 +511,117 @@ func TestLimitHostsToRequest(t *testing.T) { } } +func TestShuffleTopHostsForEvacuation(t *testing.T) { + evacuateRequest := novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + SchedulerHints: map[string]any{"_nova_check_type": "evacuate"}, + }, + }, + } + createRequest := novaapi.ExternalSchedulerRequest{ + Spec: novaapi.NovaObject[novaapi.NovaSpec]{ + Data: novaapi.NovaSpec{ + SchedulerHints: map[string]any{"_nova_check_type": "create"}, + }, + }, + } + + tests := []struct { + name string + request novaapi.ExternalSchedulerRequest + hosts []string + k int + unchangedTailFrom int // index from which hosts should be unchanged (-1 if all can change) + }{ + { + name: "non-evacuation request returns hosts unchanged", + request: createRequest, + hosts: []string{"host1", "host2", "host3"}, + k: 3, + unchangedTailFrom: 0, + }, + { + name: "evacuation with empty hosts returns empty", + request: evacuateRequest, + hosts: []string{}, + k: 3, + }, + { + name: "evacuation with single host returns unchanged", + request: evacuateRequest, + hosts: []string{"host1"}, + k: 3, + unchangedTailFrom: 0, + }, + { + name: "evacuation shuffles only first k hosts", + request: evacuateRequest, + hosts: []string{"host1", "host2", "host3", "host4", "host5"}, + k: 3, + unchangedTailFrom: 3, + }, + { + name: "evacuation with k=0 uses default of 3", + request: evacuateRequest, + hosts: []string{"host1", "host2", "host3", "host4", "host5"}, + k: 0, + unchangedTailFrom: 3, + }, + { + name: "evacuation with negative k uses default of 3", + request: evacuateRequest, + hosts: []string{"host1", "host2", "host3", "host4", "host5"}, + k: -1, + unchangedTailFrom: 3, + }, + { + name: "evacuation with k larger than hosts shuffles all", + request: evacuateRequest, + hosts: []string{"host1", "host2"}, + k: 10, + unchangedTailFrom: -1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + original := make([]string, len(tt.hosts)) + copy(original, tt.hosts) + + result := shuffleTopHostsForEvacuation(tt.request, tt.hosts, tt.k) + + if len(result) != len(tt.hosts) { + t.Fatalf("expected %d hosts, got %d", len(tt.hosts), len(result)) + } + // Verify original slice not modified + for i, h := range original { + if tt.hosts[i] != h { + t.Errorf("original slice modified at %d: expected %s, got %s", i, h, tt.hosts[i]) + } + } + // Verify tail unchanged + if tt.unchangedTailFrom >= 0 { + for i := tt.unchangedTailFrom; i < len(original); i++ { + if result[i] != original[i] { + t.Errorf("expected host[%d] = %s unchanged, got %s", i, original[i], result[i]) + } + } + } + // Verify all hosts present + hostSet := make(map[string]bool) + for _, h := range result { + hostSet[h] = true + } + for _, h := range original { + if !hostSet[h] { + t.Errorf("host %s missing from result", h) + } + } + }) + } +} + func TestHTTPAPI_inferPipelineName(t *testing.T) { delegate := &mockHTTPAPIDelegate{} config := HTTPAPIConfig{ diff --git a/internal/scheduling/nova/integration_test.go b/internal/scheduling/nova/integration_test.go index a1267c9c0..8cbae0176 100644 --- a/internal/scheduling/nova/integration_test.go +++ b/internal/scheduling/nova/integration_test.go @@ -283,8 +283,9 @@ func NewIntegrationTestServer(t *testing.T, pipelineConfig PipelineConfig, objec controller.PipelineConfigs[testPipeline.Name] = testPipeline // Create the HTTP API with the controller as delegate - skip metrics registration + // Set EvacuationShuffleK=1 to disable shuffle for deterministic test results api := &httpAPI{ - config: HTTPAPIConfig{}, + config: HTTPAPIConfig{EvacuationShuffleK: 1}, monitor: lib.NewSchedulerMonitor(), // Create new monitor but don't register delegate: controller, } From e0079955582f53def6ab7ff83ae879113940f107 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Mon, 23 Mar 2026 09:35:57 +0100 Subject: [PATCH 2/5] pr feedback --- .../scheduling/nova/external_scheduler_api.go | 13 +++-- .../nova/external_scheduler_api_test.go | 49 ++++--------------- 2 files changed, 16 insertions(+), 46 deletions(-) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 2a258297c..7fff52d94 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -140,14 +140,10 @@ func (httpAPI *httpAPI) inferPipelineName(requestData api.ExternalSchedulerReque } } -// shuffleTopHostsForEvacuation randomly reorders the first k hosts if the request +// shuffleTopHosts randomly reorders the first k hosts if the request // is an evacuation. This helps distribute evacuated VMs across multiple hosts // rather than concentrating them on the single "best" host. -func shuffleTopHostsForEvacuation(request api.ExternalSchedulerRequest, hosts []string, k int) []string { - intent, err := request.GetIntent() - if err != nil || intent != api.EvacuateIntent { - return hosts - } +func shuffleTopHosts(hosts []string, k int) []string { if k <= 0 { k = 3 } @@ -281,7 +277,10 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req // This is a hack to address the problem that Nova only uses the first host in hosts for evacuation requests. // Only for evacuation we shuffle the first k hosts to ensure that we do not get stuck on a single host - hosts = shuffleTopHostsForEvacuation(requestData, hosts, httpAPI.config.EvacuationShuffleK) + intent, err := requestData.GetIntent() + if err == nil && intent == api.EvacuateIntent { + hosts = shuffleTopHosts(hosts, httpAPI.config.EvacuationShuffleK) + } response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index bcf4e0c6b..955b42e1e 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -511,73 +511,44 @@ func TestLimitHostsToRequest(t *testing.T) { } } -func TestShuffleTopHostsForEvacuation(t *testing.T) { - evacuateRequest := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - SchedulerHints: map[string]any{"_nova_check_type": "evacuate"}, - }, - }, - } - createRequest := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - SchedulerHints: map[string]any{"_nova_check_type": "create"}, - }, - }, - } - +func TestShuffleTopHosts(t *testing.T) { tests := []struct { name string - request novaapi.ExternalSchedulerRequest hosts []string k int unchangedTailFrom int // index from which hosts should be unchanged (-1 if all can change) }{ { - name: "non-evacuation request returns hosts unchanged", - request: createRequest, - hosts: []string{"host1", "host2", "host3"}, - k: 3, - unchangedTailFrom: 0, - }, - { - name: "evacuation with empty hosts returns empty", - request: evacuateRequest, - hosts: []string{}, - k: 3, + name: "empty hosts returns empty", + hosts: []string{}, + k: 3, }, { - name: "evacuation with single host returns unchanged", - request: evacuateRequest, + name: "single host returns unchanged", hosts: []string{"host1"}, k: 3, unchangedTailFrom: 0, }, { - name: "evacuation shuffles only first k hosts", - request: evacuateRequest, + name: "shuffles only first k hosts", hosts: []string{"host1", "host2", "host3", "host4", "host5"}, k: 3, unchangedTailFrom: 3, }, { - name: "evacuation with k=0 uses default of 3", - request: evacuateRequest, + name: "k=0 uses default of 3", hosts: []string{"host1", "host2", "host3", "host4", "host5"}, k: 0, unchangedTailFrom: 3, }, { - name: "evacuation with negative k uses default of 3", - request: evacuateRequest, + name: "negative k uses default of 3", hosts: []string{"host1", "host2", "host3", "host4", "host5"}, k: -1, unchangedTailFrom: 3, }, { - name: "evacuation with k larger than hosts shuffles all", - request: evacuateRequest, + name: "k larger than hosts shuffles all", hosts: []string{"host1", "host2"}, k: 10, unchangedTailFrom: -1, @@ -589,7 +560,7 @@ func TestShuffleTopHostsForEvacuation(t *testing.T) { original := make([]string, len(tt.hosts)) copy(original, tt.hosts) - result := shuffleTopHostsForEvacuation(tt.request, tt.hosts, tt.k) + result := shuffleTopHosts(tt.hosts, tt.k) if len(result) != len(tt.hosts) { t.Fatalf("expected %d hosts, got %d", len(tt.hosts), len(result)) From eae5e8fd49a2ace86e36c26ac0f9f5c58095ffb7 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Wed, 25 Mar 2026 14:43:08 +0100 Subject: [PATCH 3/5] cleanup from merge --- internal/scheduling/nova/external_scheduler_api.go | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 9328873d7..72ff15a1e 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -27,10 +27,11 @@ import ( // Custom configuration for the Nova external scheduler api. type HTTPAPIConfig struct { - // OpenStack projects that use experimental features. - ExperimentalProjectIDs []string `json:"experimentalProjectIDs,omitempty"` // Number of top hosts to shuffle for evacuation requests. Defaults to 3. EvacuationShuffleK int `json:"evacuationShuffleK,omitempty"` + // NovaLimitHostsToRequest, if true, will filter the Nova scheduler response + // to only include hosts that were in the original request. + NovaLimitHostsToRequest bool `json:"novaLimitHostsToRequest,omitempty"` } type HTTPAPIDelegate interface { @@ -43,12 +44,6 @@ type HTTPAPI interface { Init(*http.ServeMux) } -type HTTPAPIConfig struct { - // NovaLimitHostsToRequest, if true, will filter the Nova scheduler response - // to only include hosts that were in the original request. - NovaLimitHostsToRequest bool `json:"novaLimitHostsToRequest,omitempty"` -} - type httpAPI struct { monitor scheduling.APIMonitor delegate HTTPAPIDelegate From 92d169ad0105098cae7de64502584471f32bd092 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 26 Mar 2026 08:50:11 +0100 Subject: [PATCH 4/5] polish --- cmd/main.go | 3 +++ .../scheduling/nova/external_scheduler_api.go | 5 ++-- .../nova/external_scheduler_api_test.go | 24 ++++++++++++------- internal/scheduling/nova/integration_test.go | 4 ++-- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 5d8acf1e7..be202f604 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -329,6 +329,9 @@ func main() { os.Exit(1) } novaAPIConfig := conf.GetConfigOrDie[nova.HTTPAPIConfig]() + setupLog.Info("loaded nova API config", + "evacuationShuffleK", novaAPIConfig.EvacuationShuffleK, + "novaLimitHostsToRequest", novaAPIConfig.NovaLimitHostsToRequest) nova.NewAPI(novaAPIConfig, filterWeigherController).Init(mux) // Detector pipeline controller setup. diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 72ff15a1e..d731f8ffa 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -27,7 +27,8 @@ import ( // Custom configuration for the Nova external scheduler api. type HTTPAPIConfig struct { - // Number of top hosts to shuffle for evacuation requests. Defaults to 3. + // Number of top hosts to shuffle for evacuation requests. + // Set to 0 or negative to disable shuffling. EvacuationShuffleK int `json:"evacuationShuffleK,omitempty"` // NovaLimitHostsToRequest, if true, will filter the Nova scheduler response // to only include hosts that were in the original request. @@ -125,7 +126,7 @@ func (httpAPI *httpAPI) inferPipelineName(requestData api.ExternalSchedulerReque // rather than concentrating them on the single "best" host. func shuffleTopHosts(hosts []string, k int) []string { if k <= 0 { - k = 3 + return hosts } n := min(k, len(hosts)) if n <= 1 { diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index 42004cd1b..8bb72e05a 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -512,6 +512,7 @@ func TestShuffleTopHosts(t *testing.T) { hosts []string k int unchangedTailFrom int // index from which hosts should be unchanged (-1 if all can change) + expectUnchanged bool }{ { name: "empty hosts returns empty", @@ -531,16 +532,16 @@ func TestShuffleTopHosts(t *testing.T) { unchangedTailFrom: 3, }, { - name: "k=0 uses default of 3", - hosts: []string{"host1", "host2", "host3", "host4", "host5"}, - k: 0, - unchangedTailFrom: 3, + name: "k=0 disables shuffling", + hosts: []string{"host1", "host2", "host3", "host4", "host5"}, + k: 0, + expectUnchanged: true, }, { - name: "negative k uses default of 3", - hosts: []string{"host1", "host2", "host3", "host4", "host5"}, - k: -1, - unchangedTailFrom: 3, + name: "negative k disables shuffling", + hosts: []string{"host1", "host2", "host3", "host4", "host5"}, + k: -1, + expectUnchanged: true, }, { name: "k larger than hosts shuffles all", @@ -566,6 +567,13 @@ func TestShuffleTopHosts(t *testing.T) { t.Errorf("original slice modified at %d: expected %s, got %s", i, h, tt.hosts[i]) } } + // When k <= 0, expect the same slice returned (no copy) + if tt.expectUnchanged { + if len(result) > 0 && &result[0] != &tt.hosts[0] { + t.Error("expected same slice returned when k <= 0") + } + return + } // Verify tail unchanged if tt.unchangedTailFrom >= 0 { for i := tt.unchangedTailFrom; i < len(original); i++ { diff --git a/internal/scheduling/nova/integration_test.go b/internal/scheduling/nova/integration_test.go index 5f5cd2562..f4bb38a79 100644 --- a/internal/scheduling/nova/integration_test.go +++ b/internal/scheduling/nova/integration_test.go @@ -283,9 +283,9 @@ func NewIntegrationTestServer(t *testing.T, pipelineConfig PipelineConfig, objec controller.PipelineConfigs[testPipeline.Name] = testPipeline // Create the HTTP API with the controller as delegate - skip metrics registration - // Set EvacuationShuffleK=1 to disable shuffle for deterministic test results + // Set EvacuationShuffleK=0 to disable shuffle for deterministic test results api := &httpAPI{ - config: HTTPAPIConfig{EvacuationShuffleK: 1}, + config: HTTPAPIConfig{EvacuationShuffleK: 0}, monitor: lib.NewSchedulerMonitor(), // Create new monitor but don't register delegate: controller, } From 85d65f563d857d0c17d7462f16940400dbb86a4d Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 26 Mar 2026 10:01:52 +0100 Subject: [PATCH 5/5] enable --- helm/bundles/cortex-nova/values.yaml | 3 +++ .../nova/external_scheduler_api_test.go | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index 192d714c2..b466e519c 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -141,6 +141,9 @@ cortex-scheduling-controllers: # If true, the external scheduler API will limit the list of hosts in its # response to those included in the scheduling request. novaLimitHostsToRequest: true + # Number of top hosts to shuffle for evacuation requests. + # Set to 0 or negative to disable shuffling. + evacuationShuffleK: 3 # CommittedResourceFlavorGroupPipelines maps flavor group IDs to pipeline names for CR reservations # This allows different scheduling strategies per flavor group (e.g., HANA vs GP) committedResourceFlavorGroupPipelines: diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index 8bb72e05a..70ce3c334 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -525,6 +525,24 @@ func TestShuffleTopHosts(t *testing.T) { k: 3, unchangedTailFrom: 0, }, + { + name: "two hosts with k=3 shuffles all", + hosts: []string{"host1", "host2"}, + k: 3, + unchangedTailFrom: -1, + }, + { + name: "three hosts with k=3 shuffles all", + hosts: []string{"host1", "host2", "host3"}, + k: 3, + unchangedTailFrom: -1, + }, + { + name: "four hosts with k=3 shuffles first 3", + hosts: []string{"host1", "host2", "host3", "host4"}, + k: 3, + unchangedTailFrom: 3, + }, { name: "shuffles only first k hosts", hosts: []string{"host1", "host2", "host3", "host4", "host5"},