Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions internal/scheduling/nova/external_scheduler_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"fmt"
"io"
"log/slog"
"math/rand"
"net/http"

api "github.com/cobaltcore-dev/cortex/api/external/nova"
Expand All @@ -24,6 +25,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

// Custom configuration for the Nova external scheduler api.
type HTTPAPIConfig struct {
// Number of top hosts to shuffle for evacuation requests. Defaults to 3.
EvacuationShuffleK int `json:"evacuationShuffleK,omitempty"`
// NovaLimitHostsToRequest, if true, will filter the Nova scheduler response
// to only include hosts that were in the original request.
NovaLimitHostsToRequest bool `json:"novaLimitHostsToRequest,omitempty"`
}

type HTTPAPIDelegate interface {
// Process the decision from the API. Should create and return the updated decision.
ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error
Expand All @@ -34,12 +44,6 @@ type HTTPAPI interface {
Init(*http.ServeMux)
}

type HTTPAPIConfig struct {
// NovaLimitHostsToRequest, if true, will filter the Nova scheduler response
// to only include hosts that were in the original request.
NovaLimitHostsToRequest bool `json:"novaLimitHostsToRequest,omitempty"`
}

type httpAPI struct {
monitor scheduling.APIMonitor
delegate HTTPAPIDelegate
Expand Down Expand Up @@ -116,6 +120,26 @@ func (httpAPI *httpAPI) inferPipelineName(requestData api.ExternalSchedulerReque
}
}

// shuffleTopHosts randomly reorders the first k hosts if the request
// is an evacuation. This helps distribute evacuated VMs across multiple hosts
// rather than concentrating them on the single "best" host.
func shuffleTopHosts(hosts []string, k int) []string {
if k <= 0 {
k = 3
}
n := min(k, len(hosts))
if n <= 1 {
return hosts
}
result := make([]string, len(hosts))
copy(result, hosts)
rand.Shuffle(n, func(i, j int) {
result[i], result[j] = result[j], result[i]
})
slog.Info("shuffled top hosts for evacuation", "k", n, "hosts", result[:n])
return result
}

// Limit the external scheduler response to the hosts provided in the external
// scheduler request. i.e. don't provide new hosts that weren't in the request,
// since the Nova scheduler won't know how to handle them.
Expand Down Expand Up @@ -235,6 +259,12 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req
slog.Info("limited hosts to request",
"hosts", hosts, "originalHosts", decision.Status.Result.OrderedHosts)
}
// This is a hack to address the problem that Nova only uses the first host in hosts for evacuation requests.
// Only for evacuation we shuffle the first k hosts to ensure that we do not get stuck on a single host
intent, err := requestData.GetIntent()
if err == nil && intent == api.EvacuateIntent {
hosts = shuffleTopHosts(hosts, httpAPI.config.EvacuationShuffleK)
}
response := api.ExternalSchedulerResponse{Hosts: hosts}
w.Header().Set("Content-Type", "application/json")
if err = json.NewEncoder(w).Encode(response); err != nil {
Expand Down
82 changes: 82 additions & 0 deletions internal/scheduling/nova/external_scheduler_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,88 @@ func TestLimitHostsToRequest(t *testing.T) {
}
}

func TestShuffleTopHosts(t *testing.T) {
tests := []struct {
name string
hosts []string
k int
unchangedTailFrom int // index from which hosts should be unchanged (-1 if all can change)
}{
{
name: "empty hosts returns empty",
hosts: []string{},
k: 3,
},
{
name: "single host returns unchanged",
hosts: []string{"host1"},
k: 3,
unchangedTailFrom: 0,
},
{
name: "shuffles only first k hosts",
hosts: []string{"host1", "host2", "host3", "host4", "host5"},
k: 3,
unchangedTailFrom: 3,
},
{
name: "k=0 uses default of 3",
hosts: []string{"host1", "host2", "host3", "host4", "host5"},
k: 0,
unchangedTailFrom: 3,
},
{
name: "negative k uses default of 3",
hosts: []string{"host1", "host2", "host3", "host4", "host5"},
k: -1,
unchangedTailFrom: 3,
},
{
name: "k larger than hosts shuffles all",
hosts: []string{"host1", "host2"},
k: 10,
unchangedTailFrom: -1,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
original := make([]string, len(tt.hosts))
copy(original, tt.hosts)

result := shuffleTopHosts(tt.hosts, tt.k)

if len(result) != len(tt.hosts) {
t.Fatalf("expected %d hosts, got %d", len(tt.hosts), len(result))
}
// Verify original slice not modified
for i, h := range original {
if tt.hosts[i] != h {
t.Errorf("original slice modified at %d: expected %s, got %s", i, h, tt.hosts[i])
}
}
// Verify tail unchanged
if tt.unchangedTailFrom >= 0 {
for i := tt.unchangedTailFrom; i < len(original); i++ {
if result[i] != original[i] {
t.Errorf("expected host[%d] = %s unchanged, got %s", i, original[i], result[i])
}
}
}
// Verify all hosts present
hostSet := make(map[string]bool)
for _, h := range result {
hostSet[h] = true
}
for _, h := range original {
if !hostSet[h] {
t.Errorf("host %s missing from result", h)
}
}
})
}
}

func TestHTTPAPI_inferPipelineName(t *testing.T) {
delegate := &mockHTTPAPIDelegate{}
api := NewAPI(HTTPAPIConfig{}, delegate).(*httpAPI)
Expand Down
2 changes: 2 additions & 0 deletions internal/scheduling/nova/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,9 @@ func NewIntegrationTestServer(t *testing.T, pipelineConfig PipelineConfig, objec
controller.PipelineConfigs[testPipeline.Name] = testPipeline

// Create the HTTP API with the controller as delegate - skip metrics registration
// Set EvacuationShuffleK=1 to disable shuffle for deterministic test results
api := &httpAPI{
config: HTTPAPIConfig{EvacuationShuffleK: 1},
monitor: lib.NewSchedulerMonitor(), // Create new monitor but don't register
delegate: controller,
}
Expand Down
Loading