From 7be303f8735bb64ab7a97a15b2e165c70c7c1466 Mon Sep 17 00:00:00 2001 From: liyu-ma Date: Fri, 27 Mar 2026 12:27:01 +1000 Subject: [PATCH 1/2] Add default timeouts for cluster and node pool wait steps Set 60-minute timeout for WaitForClusterSucceeded and 30-minute timeout for WaitForNodePoolSucceeded. Also add timeoutInMinutes parameter to AzCli to allow passing through step-level timeouts. --- kcl/lib/steps/azure/az_cli.k | 3 ++- kcl/lib/steps/azure/wait_for_cluster_succeeded.k | 4 ++-- kcl/lib/steps/azure/wait_for_node_pool_succeeded.k | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/kcl/lib/steps/azure/az_cli.k b/kcl/lib/steps/azure/az_cli.k index 7517786e09..e9e838530c 100644 --- a/kcl/lib/steps/azure/az_cli.k +++ b/kcl/lib/steps/azure/az_cli.k @@ -1,6 +1,6 @@ import azure_pipelines.ap.steps -AzCli = lambda serviceConnection: str, name: str, script: str, condition: str = Undefined -> steps.Step { +AzCli = lambda serviceConnection: str, name: str, script: str, condition: str = Undefined, timeoutInMinutes: int = Undefined -> steps.Step { steps.Task { task = "AzureCLI@2" inputs = { @@ -11,5 +11,6 @@ AzCli = lambda serviceConnection: str, name: str, script: str, condition: str = } displayName = name condition = condition + timeoutInMinutes = timeoutInMinutes } } \ No newline at end of file diff --git a/kcl/lib/steps/azure/wait_for_cluster_succeeded.k b/kcl/lib/steps/azure/wait_for_cluster_succeeded.k index 7fc174383e..21ee03f73a 100644 --- a/kcl/lib/steps/azure/wait_for_cluster_succeeded.k +++ b/kcl/lib/steps/azure/wait_for_cluster_succeeded.k @@ -1,6 +1,6 @@ import azure_pipelines.ap.steps -WaitForClusterSucceeded = lambda serviceConnection: str, cluster: str, resourceGroup: str, subscription: str -> steps.Step { +WaitForClusterSucceeded = lambda serviceConnection: str, cluster: str, resourceGroup: str, subscription: str, timeoutInMinutes: int = 60 -> steps.Step { script = """ while true; do STATE=$(az aks show \\ @@ -22,5 +22,5 @@ while true; do sleep 30 done """ - AzCli(serviceConnection, "Wait for cluster to succeed", script) + AzCli(serviceConnection, "Wait for cluster to succeed", script, timeoutInMinutes=timeoutInMinutes) } diff --git a/kcl/lib/steps/azure/wait_for_node_pool_succeeded.k b/kcl/lib/steps/azure/wait_for_node_pool_succeeded.k index a988676915..7d72ac2b81 100644 --- a/kcl/lib/steps/azure/wait_for_node_pool_succeeded.k +++ b/kcl/lib/steps/azure/wait_for_node_pool_succeeded.k @@ -1,6 +1,6 @@ import azure_pipelines.ap.steps -WaitForNodePoolSucceeded = lambda serviceConnection: str, cluster: str, resourceGroup: str, subscription: str, poolName: str -> steps.Step { +WaitForNodePoolSucceeded = lambda serviceConnection: str, cluster: str, resourceGroup: str, subscription: str, poolName: str, timeoutInMinutes: int = 30 -> steps.Step { script = """ while true; do STATE=$(az aks nodepool show \\ @@ -23,5 +23,5 @@ while true; do sleep 30 done """ - AzCli(serviceConnection, "Wait for node pool ${poolName} to succeed", script) + AzCli(serviceConnection, "Wait for node pool ${poolName} to succeed", script, timeoutInMinutes=timeoutInMinutes) } From 15264f69a7c9b1d61f6ee79081748f3a712049a1 Mon Sep 17 00:00:00 2001 From: liyu-ma Date: Fri, 27 Mar 2026 12:52:56 +1000 Subject: [PATCH 2/2] Regenerate example pipeline --- kcl/example_pipeline/pipeline.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kcl/example_pipeline/pipeline.yaml b/kcl/example_pipeline/pipeline.yaml index f21b7927b4..1ed0981136 100644 --- a/kcl/example_pipeline/pipeline.yaml +++ b/kcl/example_pipeline/pipeline.yaml @@ -136,6 +136,7 @@ jobs: sleep 30 done displayName: Wait for cluster to succeed + timeoutInMinutes: 60 - task: AzureCLI@2 inputs: azureSubscription: Azure-for-Telescope-internal @@ -183,6 +184,7 @@ jobs: sleep 30 done displayName: Wait for node pool cl2pool to succeed + timeoutInMinutes: 30 - task: AzureCLI@2 inputs: azureSubscription: Azure-for-Telescope-internal