Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion helm/bundles/cortex-nova/templates/pipelines_kvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@ spec:
into the smallest gaps possible, it spreads the load to ensure
workloads are balanced across hosts. In this pipeline, the balancing will
focus on general purpose virtual machines.
- name: kvm_failover_evacuation
description: |
This weigher prefers hosts with active failover reservations during
evacuation requests. Hosts matching a failover reservation where the
VM is allocated get a higher weight, encouraging placement on
pre-reserved failover capacity. For non-evacuation requests, this
weigher has no effect.
---
apiVersion: cortex.cloud/v1alpha1
kind: Pipeline
Expand Down Expand Up @@ -248,6 +255,13 @@ spec:
It pulls the requested vm into the smallest gaps possible, to ensure
other hosts with less allocation stay free for bigger vms.
In this pipeline, the binpacking will focus on hana virtual machines.
- name: kvm_failover_evacuation
description: |
This weigher prefers hosts with active failover reservations during
evacuation requests. Hosts matching a failover reservation where the
VM is allocated get a higher weight, encouraging placement on
pre-reserved failover capacity. For non-evacuation requests, this
weigher has no effect.
---
apiVersion: cortex.cloud/v1alpha1
kind: Pipeline
Expand Down Expand Up @@ -523,5 +537,12 @@ spec:
This step will filter out hosts that do not meet the compute capabilities
requested by the nova flavor extra specs, like `{"arch": "x86_64",
"maxphysaddr:bits": 46, ...}`.
weighers: []
weighers:
- name: kvm_failover_evacuation
description: |
This weigher prefers hosts with active failover reservations during
evacuation requests. Hosts matching a failover reservation where the
VM is allocated get a higher weight, encouraging placement on
pre-reserved failover capacity. For non-evacuation requests, this
weigher has no effect.
{{- end }}
6 changes: 3 additions & 3 deletions helm/bundles/cortex-nova/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ cortex-scheduling-controllers:
# Maps flavor name patterns (glob) to required failover count
# Example: {"hana_*": 2, "m1.xlarge": 1}
flavorFailoverRequirements:
"*": 1
"*": 2
# How often to check for missing failover reservations (periodic bulk reconciliation)
reconcileInterval: 15m
reconcileInterval: 5m
# Used when maxVMsToProcess limits processing, allows faster catch-up and for the first reconcile
shortReconcileInterval: 1m
# Number of max VMs to process in one periodic reconciliation loop
maxVMsToProcess: 5
maxVMsToProcess: 25
# Minimum successful reservations to use short interval
minSuccessForShortInterval: 1
# Maximum failures allowed to still use short interval
Expand Down
127 changes: 108 additions & 19 deletions tools/visualize-reservations/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
// --hide=view1,view2,... Comma-separated list of views to hide (applied after --views)
// --filter-name=pattern Filter hypervisors by name (substring match)
// --filter-trait=trait Filter hypervisors by trait (e.g., CUSTOM_HANA_EXCLUSIVE_HOST)
// --hypervisor-context=name Kubernetes context for reading Hypervisors (default: current context)
// --reservation-context=name Kubernetes context for reading Reservations (default: current context)
// --postgres-context=name Kubernetes context for reading postgres secret (default: current context)
//
// To connect to postgres when running locally, use kubectl port-forward:
//
Expand All @@ -45,6 +48,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"
)
Expand Down Expand Up @@ -177,6 +182,38 @@ func applyHideViews(views viewSet, hideFlag string) {
}
}

// getClientForContext creates a kubernetes client for the specified context.
// If contextName is empty, it uses the current/default context.
func getClientForContext(contextName string) (client.Client, error) {
var cfg *rest.Config
var err error

if contextName == "" {
// Use default context
cfg, err = config.GetConfig()
if err != nil {
return nil, fmt.Errorf("getting default kubeconfig: %w", err)
}
} else {
// Use specified context
loadingRules := clientcmd.NewDefaultClientConfigLoadingRules()
configOverrides := &clientcmd.ConfigOverrides{
CurrentContext: contextName,
}
kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides)
cfg, err = kubeConfig.ClientConfig()
if err != nil {
return nil, fmt.Errorf("getting kubeconfig for context %q: %w", contextName, err)
}
}

k8sClient, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return nil, fmt.Errorf("creating client: %w", err)
}
return k8sClient, nil
}

func main() {
// Parse command line flags
sortBy := flag.String("sort", "vm", "Sort VMs by: vm (UUID), vm-host (VM's host), res-host (reservation host)")
Expand All @@ -188,24 +225,50 @@ func main() {
hideFlag := flag.String("hide", "", "Comma-separated list of views to hide (applied after --views)")
filterName := flag.String("filter-name", "", "Filter hypervisors by name (substring match)")
filterTrait := flag.String("filter-trait", "", "Filter hypervisors by trait (e.g., CUSTOM_HANA_EXCLUSIVE_HOST)")
hypervisorContext := flag.String("hypervisor-context", "", "Kubernetes context for reading Hypervisors (default: current context)")
reservationContext := flag.String("reservation-context", "", "Kubernetes context for reading Reservations (default: current context)")
postgresContext := flag.String("postgres-context", "", "Kubernetes context for reading postgres secret (default: current context)")
flag.Parse()

views := parseViews(*viewsFlag)
applyHideViews(views, *hideFlag)

ctx := context.Background()

// Create kubernetes client
cfg, err := config.GetConfig()
// Create kubernetes clients for hypervisors and reservations
// They may use different contexts if specified
hvClient, err := getClientForContext(*hypervisorContext)
if err != nil {
fmt.Fprintf(os.Stderr, "Error getting kubeconfig: %v\n", err)
fmt.Fprintf(os.Stderr, "Error creating hypervisor client: %v\n", err)
os.Exit(1)
}

k8sClient, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating client: %v\n", err)
os.Exit(1)
// Reuse the same client if contexts are the same, otherwise create a new one
var resClient client.Client
if *reservationContext == *hypervisorContext {
resClient = hvClient
} else {
resClient, err = getClientForContext(*reservationContext)
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating reservation client: %v\n", err)
os.Exit(1)
}
}

// Create postgres client (for reading the secret)
// This is typically the local cluster where cortex runs
var pgClient client.Client
switch *postgresContext {
case *hypervisorContext:
pgClient = hvClient
case *reservationContext:
pgClient = resClient
default:
pgClient, err = getClientForContext(*postgresContext)
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating postgres client: %v\n", err)
os.Exit(1)
}
}

// Determine namespace
Expand All @@ -214,19 +277,19 @@ func main() {
ns = "default" // Default fallback
}

// Try to connect to postgres
// Try to connect to postgres (use pgClient for reading the secret)
var db *sql.DB
var serverMap map[string]serverInfo
var flavorMap map[string]flavorInfo

db, serverMap, flavorMap = connectToPostgres(ctx, k8sClient, *postgresSecret, ns, *postgresHostOverride, *postgresPortOverride)
db, serverMap, flavorMap = connectToPostgres(ctx, pgClient, *postgresSecret, ns, *postgresHostOverride, *postgresPortOverride, *postgresContext)
if db != nil {
defer db.Close()
}

// Get all hypervisors to find all VMs
// Get all hypervisors to find all VMs (use hvClient)
var allHypervisors hv1.HypervisorList
if err := k8sClient.List(ctx, &allHypervisors); err != nil {
if err := hvClient.List(ctx, &allHypervisors); err != nil {
fmt.Fprintf(os.Stderr, "Error listing hypervisors: %v\n", err)
Comment on lines +292 to 293
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Thread the selected context through all failure output.

The new summary/secret-read messages are context-aware, but the early List failures and the Postgres ping recovery hint still are not. In multi-context mode that leaves the failing cluster ambiguous, and the port-forward/rerun instructions can point people at the wrong cluster when --postgres-context is set.

🛠️ Suggested fix
+	contextLabel := func(name string) string {
+		if name == "" {
+			return "(current context)"
+		}
+		return name
+	}
+
 	// Get all hypervisors to find all VMs (use hvClient)
 	var allHypervisors hv1.HypervisorList
 	if err := hvClient.List(ctx, &allHypervisors); err != nil {
-		fmt.Fprintf(os.Stderr, "Error listing hypervisors: %v\n", err)
+		fmt.Fprintf(os.Stderr, "Error listing hypervisors from %s: %v\n", contextLabel(*hypervisorContext), err)
 		return
 	}
@@
 	// Get all reservations (both failover and committed) (use resClient)
 	var allReservations v1alpha1.ReservationList
 	if err := resClient.List(ctx, &allReservations); err != nil {
-		fmt.Fprintf(os.Stderr, "Error listing reservations: %v\n", err)
+		fmt.Fprintf(os.Stderr, "Error listing reservations from %s: %v\n", contextLabel(*reservationContext), err)
 		return
 	}
 	if err := db.PingContext(ctx); err != nil {
 		fmt.Fprintf(os.Stderr, "Warning: Could not ping postgres at %s:%s: %v\n", host, port, err)
 		fmt.Fprintf(os.Stderr, "         If running locally, use kubectl port-forward:\n")
-		fmt.Fprintf(os.Stderr, "           kubectl port-forward svc/%s %s:%s -n %s\n", host, port, port, namespace)
-		fmt.Fprintf(os.Stderr, "           ./visualize-reservations --postgres-host=localhost --postgres-port=%s\n\n", port)
+		if contextName != "" {
+			fmt.Fprintf(os.Stderr, "           kubectl --context=%s port-forward svc/%s %s:%s -n %s\n", contextName, host, port, port, namespace)
+			fmt.Fprintf(os.Stderr, "           ./visualize-reservations --postgres-context=%s --postgres-host=localhost --postgres-port=%s\n\n", contextName, port)
+		} else {
+			fmt.Fprintf(os.Stderr, "           kubectl port-forward svc/%s %s:%s -n %s\n", host, port, port, namespace)
+			fmt.Fprintf(os.Stderr, "           ./visualize-reservations --postgres-host=localhost --postgres-port=%s\n\n", port)
+		}
 		db.Close()
 		return nil, nil, nil
 	}

Also applies to: 338-339, 1349-1365, 1416-1420

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tools/visualize-reservations/main.go` around lines 292 - 293, The error
output for hvClient.List (and similar early failures) must include the active
cluster/context so failures aren’t ambiguous in multi-context mode; update the
fmt.Fprintf(os.Stderr, ...) calls around hvClient.List and the Postgres ping
recovery hint to append the selected context name (the value passed via
--postgres-context or the resolved context variable used to build ctx) or use a
context-aware logger that injects that context; search for hvClient.List,
fmt.Fprintf(os.Stderr, ...), and the Postgres ping recovery hint blocks (also at
the other mentioned locations) and modify their error messages to include the
context identifier variable (e.g., postgresContext/selectedContext) so
port-forward/rerun instructions and error lines clearly reference the correct
cluster.

return
}
Expand Down Expand Up @@ -270,9 +333,9 @@ func main() {
}
}

// Get all reservations (both failover and committed)
// Get all reservations (both failover and committed) (use resClient)
var allReservations v1alpha1.ReservationList
if err := k8sClient.List(ctx, &allReservations); err != nil {
if err := resClient.List(ctx, &allReservations); err != nil {
fmt.Fprintf(os.Stderr, "Error listing reservations: %v\n", err)
return
}
Expand Down Expand Up @@ -946,6 +1009,24 @@ func main() {
if views.has(viewSummary) {
printHeader("Summary Statistics")

// Kubernetes context information
hvCtx := *hypervisorContext
if hvCtx == "" {
hvCtx = "(current context)"
}
resCtx := *reservationContext
if resCtx == "" {
resCtx = "(current context)"
}
pgCtx := *postgresContext
if pgCtx == "" {
pgCtx = "(current context)"
}
fmt.Printf("Hypervisor context: %s\n", hvCtx)
fmt.Printf("Reservation context: %s\n", resCtx)
fmt.Printf("Postgres context: %s\n", pgCtx)
fmt.Println()

// Database connection status
if db != nil {
fmt.Printf("Database: ✅ connected (servers: %d, flavors: %d)\n", len(serverMap), len(flavorMap))
Expand Down Expand Up @@ -1265,16 +1346,22 @@ func printHypervisorSummary(hypervisors []hv1.Hypervisor, reservations []v1alpha
fmt.Println()
}

func connectToPostgres(ctx context.Context, k8sClient client.Client, secretName, namespace, hostOverride, portOverride string) (db *sql.DB, serverMap map[string]serverInfo, flavorMap map[string]flavorInfo) {
func connectToPostgres(ctx context.Context, k8sClient client.Client, secretName, namespace, hostOverride, portOverride, contextName string) (db *sql.DB, serverMap map[string]serverInfo, flavorMap map[string]flavorInfo) {
ctxDisplay := contextName
if ctxDisplay == "" {
ctxDisplay = "(current context)"
}
fmt.Fprintf(os.Stderr, "Postgres: Reading secret '%s' from namespace '%s' using context '%s'\n", secretName, namespace, ctxDisplay)

// Get the postgres secret
secret := &corev1.Secret{}
if err := k8sClient.Get(ctx, client.ObjectKey{
Namespace: namespace,
Name: secretName,
}, secret); err != nil {
fmt.Fprintf(os.Stderr, "Warning: Could not get postgres secret '%s' in namespace '%s': %v\n", secretName, namespace, err)
fmt.Fprintf(os.Stderr, "Warning: Could not get postgres secret '%s' in namespace '%s' (context: %s): %v\n", secretName, namespace, ctxDisplay, err)
fmt.Fprintf(os.Stderr, " Postgres features will be disabled.\n")
fmt.Fprintf(os.Stderr, " Use --postgres-secret and --namespace flags to specify the secret.\n\n")
fmt.Fprintf(os.Stderr, " Use --postgres-secret, --namespace, and --postgres-context flags to specify the secret location.\n\n")
return nil, nil, nil
}

Expand Down Expand Up @@ -1641,7 +1728,11 @@ func printAllServers(serverMap map[string]serverInfo, _ map[string]flavorInfo, a
}

// Check if VM is in postgres
if server, ok := serverMap[uuid]; ok {
server, inPostgres := serverMap[uuid]
switch {
case !inPostgres:
info.Status = "NOT_IN_PG"
default:
info.InPostgres = true
info.PGHost = server.OSEXTSRVATTRHost
if info.FlavorName == "" {
Expand All @@ -1658,8 +1749,6 @@ func printAllServers(serverMap map[string]serverInfo, _ map[string]flavorInfo, a
default:
info.Status = "WRONG_HOST"
}
} else {
info.Status = "NOT_IN_PG"
}

vms = append(vms, info)
Expand Down
Loading