Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions config/configs/applications.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ applications:
max_ongoing_requests: 6
ray_actor_options:
num_gpus: 2
RTX_PRO_6000_BLACKWELL:
autoscaling_config:
max_replicas: {{.Replicas.Gemma431bIt}}
min_replicas: {{.Replicas.Gemma431bIt}}
target_ongoing_requests: 4
max_ongoing_requests: 10
ray_actor_options:
num_gpus: 2
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.Gemma431bIt}}
Expand All @@ -78,6 +89,14 @@ applications:
max_num_batched_tokens: 4096
max_num_seqs: 2
tensor_parallel_size: 2
RTX_PRO_6000_BLACKWELL:
engine_args:
dtype: bfloat16
gpu_memory_utilization: 0.85
max_model_len: 240000
max_num_batched_tokens: 4096
max_num_seqs: 1
tensor_parallel_size: 2
model_config:
openai_serving_config:
chat:
Expand Down Expand Up @@ -162,6 +181,17 @@ applications:
L40S:
ray_actor_options:
num_gpus: 1
RTX_PRO_6000_BLACKWELL:
autoscaling_config:
max_replicas: {{.Replicas.GptOss20b}}
min_replicas: {{.Replicas.GptOss20b}}
target_ongoing_requests: 4
max_ongoing_requests: 8
ray_actor_options:
num_gpus: 1
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.GptOss20b}}
Expand Down Expand Up @@ -251,6 +281,12 @@ applications:
L40S:
ray_actor_options:
num_gpus: 0.075
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.031
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.UaeLarge}}
Expand All @@ -266,6 +302,9 @@ applications:
L40S:
engine_args:
gpu_memory_utilization: 0.075
RTX_PRO_6000_BLACKWELL:
engine_args:
gpu_memory_utilization: 0.031
model_config:
engine_args:
gpu_memory_utilization: 0.15
Expand Down Expand Up @@ -317,6 +356,12 @@ applications:
H100:
ray_actor_options:
num_gpus: 0.005
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.004
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.AllMinilmL6V2}}
Expand All @@ -329,6 +374,9 @@ applications:
H100:
engine_args:
gpu_memory_utilization: 0.005
RTX_PRO_6000_BLACKWELL:
engine_args:
gpu_memory_utilization: 0.004
model_config:
engine_args:
gpu_memory_utilization: 0.01
Expand Down Expand Up @@ -380,6 +428,12 @@ applications:
H100:
ray_actor_options:
num_gpus: 0.005
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.004
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.BiEncoder}}
Expand Down Expand Up @@ -447,6 +501,12 @@ applications:
L40S:
ray_actor_options:
num_gpus: 0.1
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.05
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.MbartTranslator}}
Expand Down Expand Up @@ -500,6 +560,12 @@ applications:
L40S:
ray_actor_options:
num_gpus: 0.05
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.021
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.XlmRobertaLanguageClassifier}}
Expand All @@ -515,6 +581,9 @@ applications:
L40S:
engine_args:
gpu_memory_utilization: 0.05
RTX_PRO_6000_BLACKWELL:
engine_args:
gpu_memory_utilization: 0.021
model_config:
engine_args:
gpu_memory_utilization: 0.1
Expand Down Expand Up @@ -605,6 +674,12 @@ applications:
H100:
ray_actor_options:
num_gpus: 0.005
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.004
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.CrossEncoder}}
Expand All @@ -617,6 +692,9 @@ applications:
H100:
engine_args:
gpu_memory_utilization: 0.005
RTX_PRO_6000_BLACKWELL:
engine_args:
gpu_memory_utilization: 0.004
model_config:
engine_args:
gpu_memory_utilization: 0.01
Expand Down Expand Up @@ -672,6 +750,12 @@ applications:
L40S:
ray_actor_options:
num_gpus: 0.05
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.021
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.E5LanguageClassifier}}
Expand All @@ -687,6 +771,9 @@ applications:
L40S:
engine_args:
gpu_memory_utilization: 0.05
RTX_PRO_6000_BLACKWELL:
engine_args:
gpu_memory_utilization: 0.021
model_config:
engine_args:
gpu_memory_utilization: 0.1
Expand Down Expand Up @@ -741,6 +828,12 @@ applications:
L40S:
ray_actor_options:
num_gpus: 0.025
RTX_PRO_6000_BLACKWELL:
ray_actor_options:
num_gpus: 0.013
resources:
"gpu_count:2": 0.001
"accelerator_type:RTX_PRO_6000_BLACKWELL": 0.001
options:
autoscaling_config:
max_replicas: {{.Replicas.PromptInjectionCrossEncoder}}
Expand Down
6 changes: 5 additions & 1 deletion config/configs/features/saia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ instanceScale:
h100-1-gpu: 2
H100_NVL:
h100-nvl-0-gpu: 1
h100-nvl-1-gpu: 2
h100-nvl-1-gpu: 2
RTX_PRO_6000_BLACKWELL:
rtx-pro-6000-blackwell-0-gpu: 1
rtx-pro-6000-blackwell-1-gpu: 0
rtx-pro-6000-blackwell-2-gpu: 1
35 changes: 34 additions & 1 deletion config/configs/instance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,37 @@ H100_NVL:
cpu: "16"
memory: "48Gi"
ephemeral-storage: "100Gi"
nvidia.com/gpu: "1"
nvidia.com/gpu: "1"
RTX_PRO_6000_BLACKWELL:
- tier: rtx-pro-6000-blackwell-0-gpu
gpusPerPod: 0
env:
NVIDIA_VISIBLE_DEVICES: void
resources:
limits:
Comment thread
Copilot marked this conversation as resolved.
cpu: "16"
memory: "24Gi"
ephemeral-storage: "50Gi"
nvidia.com/gpu: "0"
requests:
cpu: "4"
- tier: rtx-pro-6000-blackwell-1-gpu
gpusPerPod: 1
resources:
requests:
cpu: "4"
limits:
cpu: "16"
memory: "48Gi"
ephemeral-storage: "200Gi"
nvidia.com/gpu: "1"
- tier: rtx-pro-6000-blackwell-2-gpu
gpusPerPod: 2
resources:
requests:
cpu: "4"
limits:
cpu: "8"
memory: "96Gi"
ephemeral-storage: "400Gi"
nvidia.com/gpu: "2"
105 changes: 105 additions & 0 deletions tools/cluster_setup/openshift-cluster-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# =============================================================================
# OpenShift Cluster Config for Splunk AI Platform
# =============================================================================
# Used by openshift_with_stack.sh
# =============================================================================

kubernetes:
namespace: ai-platform

# OpenShift-specific settings
openshift:
# Grant privileged SCC to Ray worker and operator service accounts.
# Required when running GPU workloads (nvidia.com/gpu requests).
# Set to "false" only if your cluster policy already covers this.
grantPrivilegedSCC: "true"

# Node labeling for splunk.ai/* workload selectors.
# The operator schedules weaviate/ray-head on cpu nodes and Ray workers on gpu nodes.
# Use "auto" to detect by nvidia.com/gpu.present label (works when GPU Operator is installed).
# Use "manual" to specify node names explicitly below.
nodeLabelStrategy: "manual"

# L40S nodes handle CPU workloads (weaviate, ray-head, saia-api).
# RTX 6000 Blackwell node is dedicated to GPU model pods (ray-worker).
nodes:
cpu:
- 00-25-b5-b5-00-31
- 00-25-b5-b5-00-33
gpu:
- cc-40-f3-9f-e2-3c

images:
# Registry prefix applied to images that are not fully qualified
registry: "658391232643.dkr.ecr.us-east-2.amazonaws.com"

operator:
image: "658391232643.dkr.ecr.us-east-2.amazonaws.com/kiran/splunk/splunk-ai-operator:openshift-0.5"

ray:
headImage: "ml-platform/ray/ray-head:build-953"
workerImage: "ml-platform/ray/ray-worker-gpu:build-953"

weaviate:
image: "docker.io/semitechnologies/weaviate:stable-v1.28-007846a"

saia:
apiImage: "ml-platform/saia/saia-api:build-v2-main-c3b489d"
apiV2Image: "ml-platform/saia/saia-api-v2:build-v2-main-c3b489d"
dataLoaderImage: "ml-platform/saia/saia-data-loader:build-v2-main-c3b489d"

splunk:
image: "658391232643.dkr.ecr.us-east-2.amazonaws.com/splunk/splunk:10-2-ai-custom"
operatorImage: "docker.io/splunk/splunk-operator:3.0.0"

fluentBit:
image: "docker.io/fluent/fluent-bit:1.9.6"

otelCollector:
image: "docker.io/otel/opentelemetry-collector-contrib:0.122.1"

nginx:
image: "docker.io/library/nginx:1.27-alpine"

storage:
storageClass: "local-path"
vectorDbSize: "50Gi"
objectStore:
type: "seaweedfs" # aws | s3compat | minio | seaweedfs
bucket: "ai-platform-bucket"
endpoint: "<S3_ENDPOINT_URL>"
auth:
rootUser: "<S3_ACCESS_KEY_ID>"
rootPassword: "<S3_SECRET_ACCESS_KEY>"

splunk:
standaloneName: splunk-standalone

aiPlatform:
name: "openshift-ai-platform"
defaultAcceleratorType: "RTX_PRO_6000_BLACKWELL"
workerGroupConfig:
imageRegistry: ""
serviceTemplate:
type: NodePort
nodePort: 30080
features:
- name: "saia"
version: "1.1.0"

operators:
ray:
modelVersion: "v0.3.14-36-g1549f5a"
rayVersion: "2.53.0"

files:
aiPlatform: "./artifacts.yaml"
splunkOperator: "./splunk-operator-cluster.yaml"

# ECR pull secret — created automatically in all relevant namespaces during install.
# Requires AWS credentials in the environment (e.g. AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
# or an instance profile). Set enabled: false for non-ECR registries.
ecr:
enabled: true
account: "658391232643"
region: "us-east-2"
Loading