diff --git a/aks-automatic/deploy.sh b/aks-automatic/deploy.sh index eb266ae..cf8407b 100755 --- a/aks-automatic/deploy.sh +++ b/aks-automatic/deploy.sh @@ -86,7 +86,7 @@ helm upgrade \ --timeout 10m0s \ --namespace "$kuberay_namespace" \ --create-namespace kuberay-operator kuberay/kuberay-operator \ ---version 1.1.1 +--version 1.6.0 # Output the pods in the kuberay namespace kubectl get pods -n $kuberay_namespace diff --git a/aks-automatic/main.tf b/aks-automatic/main.tf index d7ad774..f44fc39 100644 --- a/aks-automatic/main.tf +++ b/aks-automatic/main.tf @@ -19,7 +19,7 @@ resource "tls_private_key" "ssh_key" { } resource "azapi_resource" "aks_auto" { - type = "Microsoft.ContainerService/managedClusters@2024-06-02-preview" + type = "Microsoft.ContainerService/managedClusters@2026-01-01" name = "aks-${var.project_prefix}-${random_string.suffix.result}" parent_id = azurerm_resource_group.rg.id location = var.location @@ -28,7 +28,7 @@ resource "azapi_resource" "aks_auto" { body = jsonencode({ properties = { - kubernetesVersion = "1.31" + kubernetesVersion = "1.33" nodeResourceGroup = "MC-aks-${var.project_prefix}-${random_string.suffix.result}" agentPoolProfiles = [ { @@ -97,7 +97,7 @@ resource "null_resource" "wait_for_aks" { } resource "azapi_update_resource" "k8s-default-node-pool-systempool-taint" { - type = "Microsoft.ContainerService/managedClusters@2024-06-02-preview" + type = "Microsoft.ContainerService/managedClusters@2026-01-01" resource_id = azapi_resource.aks_auto.id body = jsonencode({ properties = { diff --git a/aks-automatic/ray-job.pytorch-mnist-persist-logs.yaml b/aks-automatic/ray-job.pytorch-mnist-persist-logs.yaml index 2a73835..a465aa3 100644 --- a/aks-automatic/ray-job.pytorch-mnist-persist-logs.yaml +++ b/aks-automatic/ray-job.pytorch-mnist-persist-logs.yaml @@ -38,7 +38,7 @@ spec: CPUS_PER_WORKER: "2" # rayClusterSpec specifies the RayCluster instance to be created by the RayJob controller. rayClusterSpec: - rayVersion: '2.41.0' + rayVersion: '2.54.1' headGroupSpec: rayStartParams: {} # Pod template @@ -46,7 +46,7 @@ spec: spec: containers: - name: ray-head - image: rayproject/ray:2.41.0 + image: rayproject/ray:2.54.1 ports: - containerPort: 6379 name: gcs-server @@ -104,7 +104,7 @@ spec: spec: containers: - name: ray-worker - image: rayproject/ray:2.41.0 + image: rayproject/ray:2.54.1 resources: limits: cpu: "3" diff --git a/aks-classic/deploy.sh b/aks-classic/deploy.sh index 01113bd..faadec8 100755 --- a/aks-classic/deploy.sh +++ b/aks-classic/deploy.sh @@ -53,7 +53,7 @@ helm upgrade \ --timeout 10m0s \ --namespace "$kuberay_namespace" \ --create-namespace kuberay-operator kuberay/kuberay-operator \ ---version 1.1.1 +--version 1.6.0 # Output the pods in the kuberay namespace kubectl get pods -n $kuberay_namespace diff --git a/aks-classic/main.tf b/aks-classic/main.tf index e7e6052..c8042fe 100644 --- a/aks-classic/main.tf +++ b/aks-classic/main.tf @@ -27,10 +27,11 @@ resource "azurerm_kubernetes_cluster" "k8s" { } default_node_pool { - name = "systempool" - vm_size = var.system_node_pool_vm_size - node_count = var.system_node_pool_node_count - tags = { owner = var.resource_group_owner } + name = "systempool" + vm_size = var.system_node_pool_vm_size + node_count = var.system_node_pool_node_count + only_critical_addons_enabled = true + tags = { owner = var.resource_group_owner } } linux_profile { @@ -71,7 +72,7 @@ resource "null_resource" "wait_for_aks" { } resource "azapi_update_resource" "k8s-default-node-pool-systempool-taint" { - type = "Microsoft.ContainerService/managedClusters@2024-09-02-preview" + type = "Microsoft.ContainerService/managedClusters@2026-01-01" resource_id = azurerm_kubernetes_cluster.k8s.id body = jsonencode({ properties = { diff --git a/sample-tuning-setup/direct-blob-access/deploy.sh b/sample-tuning-setup/direct-blob-access/deploy.sh index 74a8045..78dc849 100755 --- a/sample-tuning-setup/direct-blob-access/deploy.sh +++ b/sample-tuning-setup/direct-blob-access/deploy.sh @@ -255,7 +255,7 @@ fi echo "Deploying KubeRay Operator in ${KUBERAY_NAMESPACE} namespace" helm repo add kuberay https://ray-project.github.io/kuberay-helm/ -helm install kuberay-operator kuberay/kuberay-operator --version 1.4.2 --namespace ${KUBERAY_NAMESPACE} --create-namespace +helm install kuberay-operator kuberay/kuberay-operator --version 1.6.0 --namespace ${KUBERAY_NAMESPACE} --create-namespace # Output the pods in the kuberay namespace kubectl get pods -n ${KUBERAY_NAMESPACE} diff --git a/sample-tuning-setup/direct-blob-access/rayjob_direct_access.tpl b/sample-tuning-setup/direct-blob-access/rayjob_direct_access.tpl index 05188bf..be3237c 100644 --- a/sample-tuning-setup/direct-blob-access/rayjob_direct_access.tpl +++ b/sample-tuning-setup/direct-blob-access/rayjob_direct_access.tpl @@ -13,7 +13,7 @@ spec: # Uncomment the next line to experiment with autoscaling. # enableInTreeAutoscaling: true # The version of Ray you are using. Make sure all Ray containers are running this version of Ray. - rayVersion: '2.43.0' + rayVersion: '2.54.1' headGroupSpec: # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' serviceType: ClusterIP diff --git a/sample-tuning-setup/direct-blob-access/variables.tf b/sample-tuning-setup/direct-blob-access/variables.tf index 58d2d77..2352669 100644 --- a/sample-tuning-setup/direct-blob-access/variables.tf +++ b/sample-tuning-setup/direct-blob-access/variables.tf @@ -22,7 +22,7 @@ variable "project_prefix" { variable "azure_kubernetes_version" { description = "Version of the azure kubernetes" - default = "1.33.0" + default = "1.33" type = string } @@ -96,7 +96,7 @@ variable "azure_storage_profile" { variable "kuberay_version" { description = "Kuberay version that needs to be installed" type = string - default = "1.4.2" + default = "1.6.0" } variable "kuberay_scrape_config_path" { diff --git a/sample-tuning-setup/rayjob.yaml b/sample-tuning-setup/rayjob.yaml index 14f67fa..07746bf 100644 --- a/sample-tuning-setup/rayjob.yaml +++ b/sample-tuning-setup/rayjob.yaml @@ -12,7 +12,7 @@ spec: # Uncomment the next line to experiment with autoscaling. # enableInTreeAutoscaling: true # The version of Ray you are using. Make sure all Ray containers are running this version of Ray. - rayVersion: '2.43.0' + rayVersion: '2.54.1' headGroupSpec: # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' serviceType: ClusterIP @@ -24,7 +24,7 @@ spec: containers: # The Ray head container - name: ray-head - image: rayproject/ray-ml:2.43.0.84f276-py310-cpu + image: rayproject/ray:2.54.1 imagePullPolicy: Always # Optimal resource allocation will depend on your Kubernetes infrastructure and might # require some experimentation. @@ -67,7 +67,7 @@ spec: spec: containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc') - image: rayproject/ray-ml:2.43.0.84f276-py310-cpu + image: rayproject/ray:2.54.1 # Optimal resource allocation will depend on your Kubernetes infrastructure and might # require some experimentation. # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal diff --git a/sample-tuning-setup/terraform/variables.tf b/sample-tuning-setup/terraform/variables.tf index 50f3701..79b3f8b 100644 --- a/sample-tuning-setup/terraform/variables.tf +++ b/sample-tuning-setup/terraform/variables.tf @@ -22,7 +22,7 @@ variable "project_prefix" { variable "azure_kubernetes_version" { description = "Version of the azure kubernetes" - default = "1.32" + default = "1.33" type = string } @@ -96,7 +96,7 @@ variable "azure_storage_profile" { variable "kuberay_version" { description = "Kuberay version that needs to be installed" type = string - default = "1.4.2" + default = "1.6.0" } variable "kuberay_persistent_volume_claim_name" {