From f01a047d0a64882eef4ffe1590b1e3c85d51fd00 Mon Sep 17 00:00:00 2001 From: imbajin Date: Sun, 22 Mar 2026 23:29:14 +0800 Subject: [PATCH 1/3] fix(workflows): clean temp arch tags after merge - add Docker Hub tag cleanup after successful manifest publish in pd/store/server flow - keep partial amd64 availability when arm64 fails by cleaning only in publish_manifest job - document the critical pd/store/server release pipeline with an ASCII workflow diagram - align AGENTS.md with the staged dual-arch publish and cleanup semantics --- .../_publish_pd_store_server_reusable.yml | 58 +++++++++++++++++++ AGENTS.md | 9 ++- README.md | 43 ++++++++++++++ 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_publish_pd_store_server_reusable.yml b/.github/workflows/_publish_pd_store_server_reusable.yml index b259bcd..5cb73d0 100644 --- a/.github/workflows/_publish_pd_store_server_reusable.yml +++ b/.github/workflows/_publish_pd_store_server_reusable.yml @@ -481,6 +481,64 @@ jobs: run: | docker buildx imagetools inspect "${{ steps.tags.outputs.image_final }}" + - name: Delete temporary arch tags after manifest publish (${{ matrix.module }}) + env: + IMAGE_REPO: ${{ matrix.image_repo }} + VERSION_TAG: ${{ env.VERSION_TAG }} + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASSWORD: ${{ secrets.DOCKERHUB_PASSWORD }} + run: | + set -euo pipefail + + namespace="${IMAGE_REPO%%/*}" + repository="${IMAGE_REPO#*/}" + if [ "$namespace" = "$repository" ]; then + echo "Invalid image repo format: $IMAGE_REPO" + exit 1 + fi + + auth_token="$( + curl --fail-with-body -sS -X POST "https://hub.docker.com/v2/users/login/" \ + -H "Content-Type: application/json" \ + -d '{"username":"'"$DOCKERHUB_USERNAME"'","password":"'"$DOCKERHUB_PASSWORD"'"}' \ + | jq -r '.token' + )" + if [ -z "$auth_token" ] || [ "$auth_token" = "null" ]; then + echo "Failed to get Docker Hub auth token" + exit 1 + fi + + delete_tag_with_retry() { + local tag="$1" + local attempt=1 + local max_attempts=5 + while [ "$attempt" -le "$max_attempts" ]; do + status_code="$( + curl -sS -o /tmp/dockerhub-delete-response.txt -w "%{http_code}" -X DELETE \ + -H "Authorization: JWT $auth_token" \ + "https://hub.docker.com/v2/repositories/${namespace}/${repository}/tags/${tag}/" + )" + + if [ "$status_code" = "204" ] || [ "$status_code" = "404" ]; then + echo "Tag ${IMAGE_REPO}:${tag} delete status: $status_code" + return 0 + fi + + if [ "$attempt" -lt "$max_attempts" ]; then + echo "Delete ${IMAGE_REPO}:${tag} failed with HTTP ${status_code}, retrying (${attempt}/${max_attempts})" + sleep $((attempt * 5)) + fi + attempt=$((attempt + 1)) + done + + echo "Delete ${IMAGE_REPO}:${tag} failed after ${max_attempts} attempts" + cat /tmp/dockerhub-delete-response.txt || true + return 1 + } + + delete_tag_with_retry "${VERSION_TAG}-amd64" + delete_tag_with_retry "${VERSION_TAG}-arm64" + update_latest_hash: needs: [prepare, publish_manifest] if: ${{ inputs.mode == 'latest' && inputs.enable_hash_gate && needs.prepare.outputs.need_update == 'true' && needs.publish_manifest.result == 'success' }} diff --git a/AGENTS.md b/AGENTS.md index fb76b02..036912f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,8 +9,9 @@ Its main purpose is to publish Docker images, validate releases, and host small - `latest` publishing is the automated path: scheduled or manually triggered, with hash gating to skip unchanged sources. - `release` publishing is the manual path: it publishes from a versioned branch and should run even if the source is unchanged. -- Shared image publishing logic lives in [`.github/workflows/_publish_image_reusable.yml`](./.github/workflows/_publish_image_reusable.yml). -- Thin `publish_latest_*.yml` and `publish_release_*.yml` files are wrappers that define trigger policy and per-image inputs. +- Most image publishers share [`.github/workflows/_publish_image_reusable.yml`](./.github/workflows/_publish_image_reusable.yml). +- `pd/store/server` uses [`.github/workflows/_publish_pd_store_server_reusable.yml`](./.github/workflows/_publish_pd_store_server_reusable.yml) with strict precheck and staged amd64/arm64 -> manifest flow. +- In the pd/store/server path, temporary `*-amd64` and `*-arm64` tags are cleaned only after a successful manifest publish. ## Editing Rules @@ -18,6 +19,9 @@ Its main purpose is to publish Docker images, validate releases, and host small - Keep wrapper workflows thin and explicit. - Do not merge `latest` and `release` wrappers unless the trigger semantics are truly identical. - Keep special-case workflows separate when they need extra prechecks, custom ordering, or non-standard release flow. +- For pd/store/server changes, preserve this intent: + - arm64 failure should not erase already published amd64 artifacts + - only full dual-arch success should trigger manifest + temporary tag cleanup ## Important Files @@ -31,4 +35,3 @@ Its main purpose is to publish Docker images, validate releases, and host small - Read the relevant workflow and the reusable workflow together. - Preserve existing trigger semantics unless the task explicitly asks for a behavioral change. - Check whether the workflow is a standard publisher or a legacy / special-case flow before refactoring. - diff --git a/README.md b/README.md index 40ebf31..ffc511a 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,48 @@ The two publishing modes behave differently: - always publishes when invoked - derives the image tag from the release branch version +## Critical Path: PD/Store/Server + +`pd/store/server` is the most important publishing flow in this repository and uses a dedicated reusable workflow: +[`.github/workflows/_publish_pd_store_server_reusable.yml`](./.github/workflows/_publish_pd_store_server_reusable.yml). + +```text + source branch (master / release-x.y.z) + | + v + prepare job + (resolve source SHA, version tag, hash gate) + | + v + integration_precheck (optional) + (compose health check for pd/store/server-hstore) + | + v + publish_amd64 (matrix x4 modules) + +-------------------------------------------------+ + | pd | store | server-hstore | server-standalone | + +-------------------------------------------------+ + push x.y.z-amd64 (or latest-amd64) + | + v + publish_arm64 (matrix x4 modules) + push x.y.z-arm64 (or latest-arm64) + | + v + publish_manifest (matrix x4 modules) + merge amd64+arm64 => x.y.z (or latest) manifest + then delete temporary -amd64 / -arm64 tags + | + v + update_latest_hash (latest mode only, optional) +``` + +Tag behavior: + +- If only amd64 is published and arm64 fails, manifest is not created and `*-amd64` remains available. +- If both amd64 and arm64 succeed, manifest publish runs and then removes temporary `*-amd64` and `*-arm64` tags. +- End users should primarily use `latest` or release version tags (`x.y.z`). + ## Why The Wrappers Stay Split Although the `latest` and `release` wrappers look similar, they encode different release semantics. @@ -84,6 +126,7 @@ Reusable workflows are the real implementation layer. - strict integration precheck for pd/store/server (hstore backend, `hugegraph/server`) - staged image publication with `*-amd64` then `*-arm64` - manifest merge to final tag (`latest` or release version) +- remove temporary `*-amd64` and `*-arm64` tags after successful manifest publish - standalone server smoke test for `hugegraph/hugegraph` Wrapper workflows provide the source repository, branch, and mode-specific inputs. From 6f3450db1cb625bf98fc98cfd6ef5b49968185ef Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 24 Mar 2026 14:48:31 +0800 Subject: [PATCH 2/3] fix(workflows): harden cleanup and clarify docs - make temporary arch-tag cleanup non-blocking after manifest publish - build Docker Hub login payload with jq-safe JSON encoding - fix delete retry loop to handle curl network errors under set -e - clarify pd/store/server docs on master default branch and staged amd64->arm64 behavior --- .../_publish_pd_store_server_reusable.yml | 16 +++++++++++----- README.md | 8 ++++++-- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.github/workflows/_publish_pd_store_server_reusable.yml b/.github/workflows/_publish_pd_store_server_reusable.yml index 5cb73d0..e7f8d5b 100644 --- a/.github/workflows/_publish_pd_store_server_reusable.yml +++ b/.github/workflows/_publish_pd_store_server_reusable.yml @@ -482,6 +482,7 @@ jobs: docker buildx imagetools inspect "${{ steps.tags.outputs.image_final }}" - name: Delete temporary arch tags after manifest publish (${{ matrix.module }}) + continue-on-error: true env: IMAGE_REPO: ${{ matrix.image_repo }} VERSION_TAG: ${{ env.VERSION_TAG }} @@ -497,10 +498,12 @@ jobs: exit 1 fi + login_payload="$(jq -nc --arg u "$DOCKERHUB_USERNAME" --arg p "$DOCKERHUB_PASSWORD" '{username:$u,password:$p}')" auth_token="$( - curl --fail-with-body -sS -X POST "https://hub.docker.com/v2/users/login/" \ - -H "Content-Type: application/json" \ - -d '{"username":"'"$DOCKERHUB_USERNAME"'","password":"'"$DOCKERHUB_PASSWORD"'"}' \ + printf '%s' "$login_payload" \ + | curl --fail-with-body -sS -X POST "https://hub.docker.com/v2/users/login/" \ + -H "Content-Type: application/json" \ + --data-binary @- \ | jq -r '.token' )" if [ -z "$auth_token" ] || [ "$auth_token" = "null" ]; then @@ -513,11 +516,14 @@ jobs: local attempt=1 local max_attempts=5 while [ "$attempt" -le "$max_attempts" ]; do - status_code="$( + if ! status_code="$( curl -sS -o /tmp/dockerhub-delete-response.txt -w "%{http_code}" -X DELETE \ -H "Authorization: JWT $auth_token" \ "https://hub.docker.com/v2/repositories/${namespace}/${repository}/tags/${tag}/" - )" + )"; then + status_code="000" + echo "Delete ${IMAGE_REPO}:${tag} failed to reach Docker Hub (curl error)" + fi if [ "$status_code" = "204" ] || [ "$status_code" = "404" ]; then echo "Tag ${IMAGE_REPO}:${tag} delete status: $status_code" diff --git a/README.md b/README.md index ffc511a..616b7c5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ standard single-image flow pd/store/server specialized flow The two publishing modes behave differently: - `latest` mode - - scheduled or ad-hoc publish for the current main branch line + - scheduled or ad-hoc publish for the current default branch line (master in `apache/hugegraph`) - skips work when the source hash has not changed - updates the stored `LAST_*_HASH` variable after a successful publish @@ -83,10 +83,14 @@ The two publishing modes behave differently: Tag behavior: -- If only amd64 is published and arm64 fails, manifest is not created and `*-amd64` remains available. +- If the `amd64` publish succeeds but the `arm64` publish fails, manifest is not created and the `*-amd64` tag remains available. - If both amd64 and arm64 succeed, manifest publish runs and then removes temporary `*-amd64` and `*-arm64` tags. - End users should primarily use `latest` or release version tags (`x.y.z`). +Execution note: + +- `publish_arm64` runs after `publish_amd64` by design, so x86 users can get a usable image earlier and arm64 compute is not spent when amd64 fails. + ## Why The Wrappers Stay Split Although the `latest` and `release` wrappers look similar, they encode different release semantics. From 251d797c24d045f68e6e15031cd51389ad6a1793 Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 24 Mar 2026 14:57:25 +0800 Subject: [PATCH 3/3] fix(workflows): attempt both arch tag cleanups - make temporary tag cleanup best-effort for both amd64 and arm64 - avoid early abort when the first deletion fails under set -e - keep warning visibility by exiting non-zero when any cleanup fails - preserve non-blocking behavior via continue-on-error in the cleanup step --- .../_publish_pd_store_server_reusable.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_publish_pd_store_server_reusable.yml b/.github/workflows/_publish_pd_store_server_reusable.yml index e7f8d5b..26a65aa 100644 --- a/.github/workflows/_publish_pd_store_server_reusable.yml +++ b/.github/workflows/_publish_pd_store_server_reusable.yml @@ -542,8 +542,22 @@ jobs: return 1 } - delete_tag_with_retry "${VERSION_TAG}-amd64" - delete_tag_with_retry "${VERSION_TAG}-arm64" + cleanup_failures=0 + + if ! delete_tag_with_retry "${VERSION_TAG}-amd64"; then + echo "Warning: failed to delete ${IMAGE_REPO}:${VERSION_TAG}-amd64" + cleanup_failures=1 + fi + + if ! delete_tag_with_retry "${VERSION_TAG}-arm64"; then + echo "Warning: failed to delete ${IMAGE_REPO}:${VERSION_TAG}-arm64" + cleanup_failures=1 + fi + + if [ "$cleanup_failures" -ne 0 ]; then + echo "Temporary arch-tag cleanup completed with warnings" + exit 1 + fi update_latest_hash: needs: [prepare, publish_manifest]