diff --git a/chart/values.yaml b/chart/values.yaml index a859bd76..2527f6a4 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -86,14 +86,15 @@ admission: # GPU Preemption Configuration # Enables utilization-based preemption of idle GPU workloads gpuPreemption: - enabled: false - metricsEndpoint: "" # AMD GPU metrics exporter URL (required when enabled) - pollingInterval: "15s" # How often the scraper polls the metrics endpoint - defaultThreshold: "5" # Default utilization threshold (%) below which a workload is idle - defaultGracePeriod: "10m" # Default idle duration before preemption eligibility - defaultPolicy: "OnPressure" # Default preemption policy (OnPressure or Always) - defaultAggregation: "Max" # Default multi-pod aggregation (Min, Max, or Avg) - defaultTTL: "24h" # Default TTL for terminal GpuWorkload CRs ("0" = retain forever) + enabled: true + # Replace with the actual in-cluster URL of your AMD GPU metrics exporter. + metricsEndpoint: "http://lgtm-stack.otel-lgtm-stack.svc.cluster.local:9090/federate?match[]=gpu_gfx_activity" + pollingInterval: "5s" + defaultThreshold: "5" + defaultGracePeriod: "10m" + defaultPolicy: "OnPressure" + defaultAggregation: "Max" + defaultTTL: "24h" # Optional KaiwoConfig - global operator configuration # If provided, a KaiwoConfig resource will be created