splunk · kbhos-splunk · Jun 8, 2026
diff --git a/config/configs/applications.yaml b/config/configs/applications.yaml
@@ -55,6 +55,14 @@ applications:
               max_ongoing_requests: 6
               ray_actor_options:
                 num_gpus: 2
+            RTX_PRO_6000_BLACKWELL:
+              autoscaling_config:
+                max_replicas: {{.Replicas.Gemma431bIt}}
+                min_replicas: {{.Replicas.Gemma431bIt}}
+                target_ongoing_requests: 4
+              max_ongoing_requests: 10
+              ray_actor_options:
+                num_gpus: 2
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.Gemma431bIt}}
@@ -78,6 +86,14 @@ applications:
               max_num_batched_tokens: 4096
               max_num_seqs: 2
               tensor_parallel_size: 2
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              dtype: bfloat16
+              gpu_memory_utilization: 0.85
+              max_model_len: 240000
+              max_num_batched_tokens: 4096
+              max_num_seqs: 1
+              tensor_parallel_size: 2
         model_config:
           openai_serving_config:
             chat:
@@ -162,6 +178,9 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 1
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 1
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.GptOss20b}}
@@ -178,6 +197,10 @@ applications:
             engine_args:
               gpu_memory_utilization: 0.95
               tensor_parallel_size: 1
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.50
+              tensor_parallel_size: 1
         model_config:
           openai_serving_config:
             chat:
@@ -251,6 +274,9 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.075
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.031
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.UaeLarge}}
@@ -266,6 +292,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.075
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.031
         model_config:
           engine_args:
             gpu_memory_utilization: 0.15
@@ -317,6 +346,9 @@ applications:
             H100:
               ray_actor_options:
                 num_gpus: 0.005
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.004
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.AllMinilmL6V2}}
@@ -329,6 +361,9 @@ applications:
           H100:
             engine_args:
               gpu_memory_utilization: 0.005
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.004
         model_config:
           engine_args:
             gpu_memory_utilization: 0.01
@@ -500,6 +535,9 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.05
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.021
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.XlmRobertaLanguageClassifier}}
@@ -515,6 +553,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.05
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.021
         model_config:
           engine_args:
             gpu_memory_utilization: 0.1
@@ -605,6 +646,9 @@ applications:
             H100:
               ray_actor_options:
                 num_gpus: 0.005
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.004
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.CrossEncoder}}
@@ -617,6 +661,9 @@ applications:
           H100:
             engine_args:
               gpu_memory_utilization: 0.005
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.004
         model_config:
           engine_args:
             gpu_memory_utilization: 0.01
@@ -672,6 +719,9 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.05
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.021
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.E5LanguageClassifier}}
@@ -687,6 +737,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.05
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.021
         model_config:
           engine_args:
             gpu_memory_utilization: 0.1
@@ -741,6 +794,9 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.025
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.013
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.PromptInjectionCrossEncoder}}

diff --git a/config/configs/instance.yaml b/config/configs/instance.yaml
@@ -85,4 +85,39 @@ H100_NVL:
         cpu: "16"
         memory: "48Gi"
         ephemeral-storage: "100Gi"
-        nvidia.com/gpu: "1"
+        nvidia.com/gpu: "1"
+# Keep the key name in sync with applications.yaml gpu_type_options_override keys
+# and spec.defaultAcceleratorType — builder.go requires an exact match.
+RTX_PRO_6000_BLACKWELL:
+  - tier: rtx-pro-6000-blackwell-0-gpu
+    gpusPerPod: 0
+    env:
+      NVIDIA_VISIBLE_DEVICES: void
+    resources:
+      limits:
+        cpu: "16"
+        memory: "24Gi"
+        ephemeral-storage: "50Gi"
+        nvidia.com/gpu: "0"
+      requests:
+        cpu: "4"
+  - tier: rtx-pro-6000-blackwell-1-gpu
+    gpusPerPod: 1
+    resources:
+      requests:
+        cpu: "4"
+      limits:
+        cpu: "16"
+        memory: "48Gi"
+        ephemeral-storage: "200Gi"
+        nvidia.com/gpu: "1"
+  - tier: rtx-pro-6000-blackwell-2-gpu
+    gpusPerPod: 2
+    resources:
+      requests:
+        cpu: "1"
+      limits:
+        cpu: "8"
+        memory: "96Gi"
+        ephemeral-storage: "400Gi"
+        nvidia.com/gpu: "2"
diff --git a/tools/cluster_setup/k0s-cluster-config.yaml b/tools/cluster_setup/k0s-cluster-config.yaml
@@ -153,6 +153,9 @@ aiPlatform:
   name: "splunk-ai-stack"
   defaultAcceleratorType: "L40S"
   # defaultAcceleratorType: "H100"
+  # defaultAcceleratorType: "RTX_PRO_6000_BLACKWELL"   # RTX PRO 6000 Blackwell (g7e-class) nodes.
+  #   Must exactly match an instance.yaml tier key AND the gpu_type_options_override
+  #   keys in applications.yaml — builder.go errors if no matching worker tier exists.
 
   workerGroupConfig:
     imageRegistry: ""