splunk · kupratyu-splunk · Jun 18, 2026 · Jun 12, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/config/configs/applications.yaml b/config/configs/applications.yaml
@@ -55,6 +55,17 @@ applications:
               max_ongoing_requests: 6
               ray_actor_options:
                 num_gpus: 2
+            RTX_PRO_6000_BLACKWELL:
+              autoscaling_config:
+                max_replicas: {{.Replicas.Gemma431bIt}}
+                min_replicas: {{.Replicas.Gemma431bIt}}
+                target_ongoing_requests: 4
+              max_ongoing_requests: 10
+              ray_actor_options:
+                num_gpus: 2
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.Gemma431bIt}}
@@ -78,6 +89,14 @@ applications:
               max_num_batched_tokens: 4096
               max_num_seqs: 2
               tensor_parallel_size: 2
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              dtype: bfloat16
+              gpu_memory_utilization: 0.85
+              max_model_len: 240000
+              max_num_batched_tokens: 4096
+              max_num_seqs: 1
+              tensor_parallel_size: 2
         model_config:
           openai_serving_config:
             chat:
@@ -162,6 +181,17 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 1
+            RTX_PRO_6000_BLACKWELL:
+              autoscaling_config:
+                max_replicas: {{.Replicas.GptOss20b}}
+                min_replicas: {{.Replicas.GptOss20b}}
+                target_ongoing_requests: 4
+              max_ongoing_requests: 8
+              ray_actor_options:
+                num_gpus: 1
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.GptOss20b}}
@@ -251,6 +281,12 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.075
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.031
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.UaeLarge}}
@@ -266,6 +302,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.075
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.031
         model_config:
           engine_args:
             gpu_memory_utilization: 0.15
@@ -317,6 +356,12 @@ applications:
             H100:
               ray_actor_options:
                 num_gpus: 0.005
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.004
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.AllMinilmL6V2}}
@@ -329,6 +374,9 @@ applications:
           H100:
             engine_args:
               gpu_memory_utilization: 0.005
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.004
         model_config:
           engine_args:
             gpu_memory_utilization: 0.01
@@ -380,6 +428,12 @@ applications:
             H100:
               ray_actor_options:
                 num_gpus: 0.005
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.004
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.BiEncoder}}
@@ -447,6 +501,12 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.1
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.05
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.MbartTranslator}}
@@ -500,6 +560,12 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.05
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.021
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.XlmRobertaLanguageClassifier}}
@@ -515,6 +581,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.05
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.021
         model_config:
           engine_args:
             gpu_memory_utilization: 0.1
@@ -605,6 +674,12 @@ applications:
             H100:
               ray_actor_options:
                 num_gpus: 0.005
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.004
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.CrossEncoder}}
@@ -617,6 +692,9 @@ applications:
           H100:
             engine_args:
               gpu_memory_utilization: 0.005
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.004
         model_config:
           engine_args:
             gpu_memory_utilization: 0.01
@@ -672,6 +750,12 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.05
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.021
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.E5LanguageClassifier}}
@@ -687,6 +771,9 @@ applications:
           L40S:
             engine_args:
               gpu_memory_utilization: 0.05
+          RTX_PRO_6000_BLACKWELL:
+            engine_args:
+              gpu_memory_utilization: 0.021
         model_config:
           engine_args:
             gpu_memory_utilization: 0.1
@@ -741,6 +828,12 @@ applications:
             L40S:
               ray_actor_options:
                 num_gpus: 0.025
+            RTX_PRO_6000_BLACKWELL:
+              ray_actor_options:
+                num_gpus: 0.013
+                resources:
+                  "gpu_count:2": 0.001
+                  "accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
           options:
             autoscaling_config:
               max_replicas: {{.Replicas.PromptInjectionCrossEncoder}}

diff --git a/config/configs/features/saia.yaml b/config/configs/features/saia.yaml
@@ -23,4 +23,8 @@ instanceScale:
     h100-1-gpu: 2
   H100_NVL:
     h100-nvl-0-gpu: 1
-    h100-nvl-1-gpu: 2
+    h100-nvl-1-gpu: 2
+  RTX_PRO_6000_BLACKWELL:
+    rtx-pro-6000-blackwell-0-gpu: 1
+    rtx-pro-6000-blackwell-1-gpu: 0
+    rtx-pro-6000-blackwell-2-gpu: 1
diff --git a/config/configs/instance.yaml b/config/configs/instance.yaml
@@ -85,4 +85,37 @@ H100_NVL:
         cpu: "16"
         memory: "48Gi"
         ephemeral-storage: "100Gi"
-        nvidia.com/gpu: "1"
+        nvidia.com/gpu: "1"
+RTX_PRO_6000_BLACKWELL:
+  - tier: rtx-pro-6000-blackwell-0-gpu
+    gpusPerPod: 0
+    env:
+      NVIDIA_VISIBLE_DEVICES: void
+    resources:
+      limits:
+        cpu: "16"
+        memory: "24Gi"
+        ephemeral-storage: "50Gi"
+        nvidia.com/gpu: "0"
+      requests:
+        cpu: "4"
+  - tier: rtx-pro-6000-blackwell-1-gpu
+    gpusPerPod: 1
+    resources:
+      requests:
+        cpu: "4"
+      limits:
+        cpu: "16"
+        memory: "48Gi"
+        ephemeral-storage: "200Gi"
+        nvidia.com/gpu: "1"
+  - tier: rtx-pro-6000-blackwell-2-gpu
+    gpusPerPod: 2
+    resources:
+      requests:
+        cpu: "4"
+      limits:
+        cpu: "8"
+        memory: "96Gi"
+        ephemeral-storage: "400Gi"
+        nvidia.com/gpu: "2"
diff --git a/tools/cluster_setup/openshift-cluster-config.yaml b/tools/cluster_setup/openshift-cluster-config.yaml
@@ -0,0 +1,105 @@
+# =============================================================================
+# OpenShift Cluster Config for Splunk AI Platform
+# =============================================================================
+# Used by openshift_with_stack.sh
+# =============================================================================
+
+kubernetes:
+  namespace: ai-platform
+
+# OpenShift-specific settings
+openshift:
+  # Grant privileged SCC to Ray worker and operator service accounts.
+  # Required when running GPU workloads (nvidia.com/gpu requests).
+  # Set to "false" only if your cluster policy already covers this.
+  grantPrivilegedSCC: "true"
+
+  # Node labeling for splunk.ai/* workload selectors.
+  # The operator schedules weaviate/ray-head on cpu nodes and Ray workers on gpu nodes.
+  # Use "auto" to detect by nvidia.com/gpu.present label (works when GPU Operator is installed).
+  # Use "manual" to specify node names explicitly below.
+  nodeLabelStrategy: "manual"
+
+  # L40S nodes handle CPU workloads (weaviate, ray-head, saia-api).
+  # RTX 6000 Blackwell node is dedicated to GPU model pods (ray-worker).
+  nodes:
+    cpu:
+      - 00-25-b5-b5-00-31
+      - 00-25-b5-b5-00-33
+    gpu:
+      - cc-40-f3-9f-e2-3c
+
+images:
+  # Registry prefix applied to images that are not fully qualified
+  registry: "658391232643.dkr.ecr.us-east-2.amazonaws.com"
+
+  operator:
+    image: "658391232643.dkr.ecr.us-east-2.amazonaws.com/kiran/splunk/splunk-ai-operator:openshift-0.5"
+
+  ray:
+    headImage: "ml-platform/ray/ray-head:build-953"
+    workerImage: "ml-platform/ray/ray-worker-gpu:build-953"
+
+  weaviate:
+    image: "docker.io/semitechnologies/weaviate:stable-v1.28-007846a"
+
+  saia:
+    apiImage: "ml-platform/saia/saia-api:build-v2-main-c3b489d"
+    apiV2Image: "ml-platform/saia/saia-api-v2:build-v2-main-c3b489d"
+    dataLoaderImage: "ml-platform/saia/saia-data-loader:build-v2-main-c3b489d"
+
+  splunk:
+    image: "658391232643.dkr.ecr.us-east-2.amazonaws.com/splunk/splunk:10-2-ai-custom"
+    operatorImage: "docker.io/splunk/splunk-operator:3.0.0"
+
+  fluentBit:
+    image: "docker.io/fluent/fluent-bit:1.9.6"
+
+  otelCollector:
+    image: "docker.io/otel/opentelemetry-collector-contrib:0.122.1"
+
+  nginx:
+    image: "docker.io/library/nginx:1.27-alpine"
+
+storage:
+  storageClass: "local-path"
+  vectorDbSize: "50Gi"
+  objectStore:
+    type: "seaweedfs"           # aws | s3compat | minio | seaweedfs
+    bucket: "ai-platform-bucket"
+    endpoint: "<S3_ENDPOINT_URL>"
+    auth:
+      rootUser: "<S3_ACCESS_KEY_ID>"
+      rootPassword: "<S3_SECRET_ACCESS_KEY>"
+
+splunk:
+  standaloneName: splunk-standalone
+
+aiPlatform:
+  name: "openshift-ai-platform"
+  defaultAcceleratorType: "RTX_PRO_6000_BLACKWELL"
+  workerGroupConfig:
+    imageRegistry: ""
+  serviceTemplate:
+    type: NodePort
+    nodePort: 30080
+  features:
+    - name: "saia"
+      version: "1.1.0"
+
+operators:
+  ray:
+    modelVersion: "v0.3.14-36-g1549f5a"
+    rayVersion: "2.53.0"
+
+files:
+  aiPlatform: "./artifacts.yaml"
+  splunkOperator: "./splunk-operator-cluster.yaml"
+
+# ECR pull secret — created automatically in all relevant namespaces during install.
+# Requires AWS credentials in the environment (e.g. AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
+# or an instance profile). Set enabled: false for non-ECR registries.
+ecr:
+  enabled: true
+  account: "658391232643"
+  region: "us-east-2"