diff --git a/config/configs/applications.yaml b/config/configs/applications.yaml index a9a0869f..4d91409d 100644 --- a/config/configs/applications.yaml +++ b/config/configs/applications.yaml @@ -55,6 +55,14 @@ applications: max_ongoing_requests: 6 ray_actor_options: num_gpus: 2 + RTX_PRO_6000_BLACKWELL: + autoscaling_config: + max_replicas: {{.Replicas.Gemma431bIt}} + min_replicas: {{.Replicas.Gemma431bIt}} + target_ongoing_requests: 4 + max_ongoing_requests: 10 + ray_actor_options: + num_gpus: 2 options: autoscaling_config: max_replicas: {{.Replicas.Gemma431bIt}} @@ -78,6 +86,14 @@ applications: max_num_batched_tokens: 4096 max_num_seqs: 2 tensor_parallel_size: 2 + RTX_PRO_6000_BLACKWELL: + engine_args: + dtype: bfloat16 + gpu_memory_utilization: 0.85 + max_model_len: 240000 + max_num_batched_tokens: 4096 + max_num_seqs: 1 + tensor_parallel_size: 2 model_config: openai_serving_config: chat: @@ -162,6 +178,9 @@ applications: L40S: ray_actor_options: num_gpus: 1 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 1 options: autoscaling_config: max_replicas: {{.Replicas.GptOss20b}} @@ -178,6 +197,10 @@ applications: engine_args: gpu_memory_utilization: 0.95 tensor_parallel_size: 1 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.50 + tensor_parallel_size: 1 model_config: openai_serving_config: chat: @@ -251,6 +274,9 @@ applications: L40S: ray_actor_options: num_gpus: 0.075 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.031 options: autoscaling_config: max_replicas: {{.Replicas.UaeLarge}} @@ -266,6 +292,9 @@ applications: L40S: engine_args: gpu_memory_utilization: 0.075 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.031 model_config: engine_args: gpu_memory_utilization: 0.15 @@ -317,6 +346,9 @@ applications: H100: ray_actor_options: num_gpus: 0.005 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.004 options: autoscaling_config: max_replicas: {{.Replicas.AllMinilmL6V2}} @@ -329,6 +361,9 @@ applications: H100: engine_args: gpu_memory_utilization: 0.005 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.004 model_config: engine_args: gpu_memory_utilization: 0.01 @@ -500,6 +535,9 @@ applications: L40S: ray_actor_options: num_gpus: 0.05 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.021 options: autoscaling_config: max_replicas: {{.Replicas.XlmRobertaLanguageClassifier}} @@ -515,6 +553,9 @@ applications: L40S: engine_args: gpu_memory_utilization: 0.05 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.021 model_config: engine_args: gpu_memory_utilization: 0.1 @@ -605,6 +646,9 @@ applications: H100: ray_actor_options: num_gpus: 0.005 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.004 options: autoscaling_config: max_replicas: {{.Replicas.CrossEncoder}} @@ -617,6 +661,9 @@ applications: H100: engine_args: gpu_memory_utilization: 0.005 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.004 model_config: engine_args: gpu_memory_utilization: 0.01 @@ -672,6 +719,9 @@ applications: L40S: ray_actor_options: num_gpus: 0.05 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.021 options: autoscaling_config: max_replicas: {{.Replicas.E5LanguageClassifier}} @@ -687,6 +737,9 @@ applications: L40S: engine_args: gpu_memory_utilization: 0.05 + RTX_PRO_6000_BLACKWELL: + engine_args: + gpu_memory_utilization: 0.021 model_config: engine_args: gpu_memory_utilization: 0.1 @@ -741,6 +794,9 @@ applications: L40S: ray_actor_options: num_gpus: 0.025 + RTX_PRO_6000_BLACKWELL: + ray_actor_options: + num_gpus: 0.013 options: autoscaling_config: max_replicas: {{.Replicas.PromptInjectionCrossEncoder}} diff --git a/config/configs/instance.yaml b/config/configs/instance.yaml index 71ea8e78..4705593a 100644 --- a/config/configs/instance.yaml +++ b/config/configs/instance.yaml @@ -85,4 +85,39 @@ H100_NVL: cpu: "16" memory: "48Gi" ephemeral-storage: "100Gi" - nvidia.com/gpu: "1" \ No newline at end of file + nvidia.com/gpu: "1" +# Keep the key name in sync with applications.yaml gpu_type_options_override keys +# and spec.defaultAcceleratorType — builder.go requires an exact match. +RTX_PRO_6000_BLACKWELL: + - tier: rtx-pro-6000-blackwell-0-gpu + gpusPerPod: 0 + env: + NVIDIA_VISIBLE_DEVICES: void + resources: + limits: + cpu: "16" + memory: "24Gi" + ephemeral-storage: "50Gi" + nvidia.com/gpu: "0" + requests: + cpu: "4" + - tier: rtx-pro-6000-blackwell-1-gpu + gpusPerPod: 1 + resources: + requests: + cpu: "4" + limits: + cpu: "16" + memory: "48Gi" + ephemeral-storage: "200Gi" + nvidia.com/gpu: "1" + - tier: rtx-pro-6000-blackwell-2-gpu + gpusPerPod: 2 + resources: + requests: + cpu: "1" + limits: + cpu: "8" + memory: "96Gi" + ephemeral-storage: "400Gi" + nvidia.com/gpu: "2" \ No newline at end of file diff --git a/tools/cluster_setup/k0s-cluster-config.yaml b/tools/cluster_setup/k0s-cluster-config.yaml index 2bbcfac8..40e45338 100644 --- a/tools/cluster_setup/k0s-cluster-config.yaml +++ b/tools/cluster_setup/k0s-cluster-config.yaml @@ -153,6 +153,9 @@ aiPlatform: name: "splunk-ai-stack" defaultAcceleratorType: "L40S" # defaultAcceleratorType: "H100" + # defaultAcceleratorType: "RTX_PRO_6000_BLACKWELL" # RTX PRO 6000 Blackwell (g7e-class) nodes. + # Must exactly match an instance.yaml tier key AND the gpu_type_options_override + # keys in applications.yaml — builder.go errors if no matching worker tier exists. workerGroupConfig: imageRegistry: ""