From 3679f2874b8c85f4688ad3dad55fcd80197042d4 Mon Sep 17 00:00:00 2001 From: Rael Garcia Date: Thu, 16 Apr 2026 14:53:28 +0200 Subject: [PATCH 1/4] Add retry logic to ACR login in on-demand sync script The az acr login command can fail transiently with managed identity token errors (e.g. "JSON is invalid: Expecting value"). This wraps both ACR login calls with exponential backoff retry (up to 5 attempts) to handle these transient failures during Ev2 image mirroring. --- pipelines/types/on-demand.sh | 38 ++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/pipelines/types/on-demand.sh b/pipelines/types/on-demand.sh index b5e0ef16..747929f1 100644 --- a/pipelines/types/on-demand.sh +++ b/pipelines/types/on-demand.sh @@ -2,6 +2,22 @@ set -euo pipefail +retry() { + local retries="${1}" + shift + local count=0 + until "$@"; do + count=$((count + 1)) + if [[ "${count}" -ge "${retries}" ]]; then + echo "Command failed after ${retries} attempts: $*" + return 1 + fi + local wait=$((2 ** count)) + echo "Command failed (attempt ${count}/${retries}). Retrying in ${wait}s..." + sleep "${wait}" + done +} + copyImageFromRegistry() { # shortcut mirroring if the source registry is the same as the target ACR REQUIRED_REGISTRY_VARS=("TARGET_ACR" "SOURCE_REGISTRY") @@ -57,12 +73,15 @@ copyImageFromRegistry() { # ACR login to target registry echo "Logging into target ACR ${TARGET_ACR}." - if output="$( az acr login --name "${TARGET_ACR}" --expose-token --only-show-errors --output json 2>&1 )"; then - RESPONSE="${output}" - else - echo "Failed to log in to ACR ${TARGET_ACR}: ${output}" - exit 1 - fi + acr_login_target() { + if output="$( az acr login --name "${TARGET_ACR}" --expose-token --only-show-errors --output json 2>&1 )"; then + RESPONSE="${output}" + else + echo "Failed to log in to ACR ${TARGET_ACR}: ${output}" + return 1 + fi + } + retry 5 acr_login_target TARGET_ACR_LOGIN_SERVER="$(jq --raw-output .loginServer <<<"${RESPONSE}" )" oras login --registry-config "${AUTH_JSON}" \ --username 00000000-0000-0000-0000-000000000000 \ @@ -135,8 +154,11 @@ copyImageFromOciLayout() { echo "Getting the ACR access token." USERNAME="00000000-0000-0000-0000-000000000000" - PASSWORD=$(az acr login --name "$TARGET_ACR" --expose-token --output tsv --query accessToken) - + acr_login_oci() { + PASSWORD=$(az acr login --name "$TARGET_ACR" --expose-token --output tsv --query accessToken) + } + retry 5 acr_login_oci + echo "Logging in with ORAS." oras login $TARGET_ACR_LOGIN_SERVER --username $USERNAME --password-stdin <<< $PASSWORD From 784aabd371c3ceb48edf51e22b4d9a63cc665b6a Mon Sep 17 00:00:00 2001 From: Rael Garcia Date: Thu, 16 Apr 2026 15:05:08 +0200 Subject: [PATCH 2/4] Handle az acr login failure in copyImageFromOciLayout retry Without error handling, set -e causes the script to exit immediately on a transient az failure instead of letting the retry function catch it. Also add --only-show-errors to reduce noise during retries. --- pipelines/types/on-demand.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pipelines/types/on-demand.sh b/pipelines/types/on-demand.sh index 747929f1..480b8c2c 100644 --- a/pipelines/types/on-demand.sh +++ b/pipelines/types/on-demand.sh @@ -155,7 +155,10 @@ copyImageFromOciLayout() { echo "Getting the ACR access token." USERNAME="00000000-0000-0000-0000-000000000000" acr_login_oci() { - PASSWORD=$(az acr login --name "$TARGET_ACR" --expose-token --output tsv --query accessToken) + if ! PASSWORD=$(az acr login --name "$TARGET_ACR" --expose-token --only-show-errors --output tsv --query accessToken); then + echo "Failed to get ACR access token for ${TARGET_ACR}" + return 1 + fi } retry 5 acr_login_oci From b63d3bfc890cc961a8c37325996854e62890a85b Mon Sep 17 00:00:00 2001 From: Rael Garcia Date: Thu, 16 Apr 2026 16:10:22 +0200 Subject: [PATCH 3/4] Address review: use stderr for error/retry messages, rename wait variable - Send retry error and progress messages to stderr - Rename local variable from 'wait' to 'delay' to avoid shadowing the bash builtin --- pipelines/types/on-demand.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pipelines/types/on-demand.sh b/pipelines/types/on-demand.sh index 480b8c2c..84acfece 100644 --- a/pipelines/types/on-demand.sh +++ b/pipelines/types/on-demand.sh @@ -9,12 +9,12 @@ retry() { until "$@"; do count=$((count + 1)) if [[ "${count}" -ge "${retries}" ]]; then - echo "Command failed after ${retries} attempts: $*" + echo "Command failed after ${retries} attempts: $*" >&2 return 1 fi - local wait=$((2 ** count)) - echo "Command failed (attempt ${count}/${retries}). Retrying in ${wait}s..." - sleep "${wait}" + local delay=$((2 ** count)) + echo "Command failed (attempt ${count}/${retries}). Retrying in ${delay}s..." >&2 + sleep "${delay}" done } @@ -156,7 +156,7 @@ copyImageFromOciLayout() { USERNAME="00000000-0000-0000-0000-000000000000" acr_login_oci() { if ! PASSWORD=$(az acr login --name "$TARGET_ACR" --expose-token --only-show-errors --output tsv --query accessToken); then - echo "Failed to get ACR access token for ${TARGET_ACR}" + echo "Failed to get ACR access token for ${TARGET_ACR}" >&2 return 1 fi } From c29d7f059e34f0d16b6197da19fdbdfb0f7e8512 Mon Sep 17 00:00:00 2001 From: Jan-Hendrik Boll Date: Thu, 16 Apr 2026 16:46:00 +0000 Subject: [PATCH 4/4] Update pipelines/types/on-demand.sh --- pipelines/types/on-demand.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/types/on-demand.sh b/pipelines/types/on-demand.sh index 84acfece..dc024482 100644 --- a/pipelines/types/on-demand.sh +++ b/pipelines/types/on-demand.sh @@ -77,7 +77,7 @@ copyImageFromRegistry() { if output="$( az acr login --name "${TARGET_ACR}" --expose-token --only-show-errors --output json 2>&1 )"; then RESPONSE="${output}" else - echo "Failed to log in to ACR ${TARGET_ACR}: ${output}" + echo "Failed to log in to ACR ${TARGET_ACR}: ${output}" >&2 return 1 fi }