From 2e12f0f502593411f5df6d6d7f5e32be8b984175 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Fri, 10 Apr 2026 15:47:04 -0700 Subject: [PATCH 1/6] Add GH Action workflow to update downstream repos Searches the Nextstrain GitHub org to find repos that have the `.gitrepo` file with the nextstrain/shared remote to create a matrix of repos to potentially update. Installs and uses `git subrepo` to pull in the latest changes with the `--force` flag to avoid merge conflicts to due rebasing in the downstream repo. If there are changes pulled down, then `git subrepo` will create a single commit. If there is a single commit, then push up the changes to a branch and create or update the PR in the downstream repo. Nothing happens if there were no changes and workflow exits with error if it encounters more than one commit. --- .../workflows/update-downstream-repos.yaml | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 .github/workflows/update-downstream-repos.yaml diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml new file mode 100644 index 0000000..73d215b --- /dev/null +++ b/.github/workflows/update-downstream-repos.yaml @@ -0,0 +1,102 @@ +name: Update downstream repos + +on: + push: + branches: + - main + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: true + +jobs: + build-downstream-matrix: + runs-on: ubuntu-latest + outputs: + downstream-matrix: ${{ steps.downstream-matrix.outputs.downstream-matrix }} + steps: + - id: downstream-matrix + env: + GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + # Create an array of potential repos to update that will be used as the + # matrix to the next job. + # [ + # { "repo": "nextstrain/zika", "path": "shared/vendored"}, + # { "repo": "nextstrain/mpox", "path": "shared/vendored"}, + # ... + # ] + run: | + matrix=$(gh api -X GET search/code \ + -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/shared"' \ + | jq -c ' + .items + | map({ + "repo": "\(.repository.full_name)", + "path": "\(.path | split("/")[0:-1] | join("/"))" + }) + ') + echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" + update-downstream: + name: update-downstream (${{ matrix.repo }}, ${{ matrix.path }}) + needs: [build-downstream-matrix] + strategy: + fail-fast: false + matrix: + include: ${{ fromJson(needs.build-downstream-matrix.outputs.downstream-matrix) }} + env: + GIT_SUBREPO_DIR: .git/git-subrepo + VENDORED_PATH: ${{ matrix.path }} + branch: nextstrain-bot/update-vendored + runs-on: ubuntu-latest + steps: + - name: Checkout ${{ matrix.repo }} + uses: actions/checkout@v6 + with: + repository: ${{ matrix.repo }} + token: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + # Checkout git-subrepo _after_ the downstream repo to ensure that we + # keep it in a path within the downstream repo that does not interfere + # with the subrepo changes + - name: Checkout git-subrepo + uses: actions/checkout@v6 + with: + repository: "ingydotnet/git-subrepo" + path: ${{ env.GIT_SUBREPO_DIR }} + - name: Add git-subrepo to PATH + run: echo "$GIT_SUBREPO_DIR/lib" >> "$GITHUB_PATH" + - name: Update vendored path + run: | + git config user.name "${{ vars.GIT_USER_NAME_NEXTSTRAIN_BOT }}" + git config user.email "${{ vars.GIT_USER_EMAIL_NEXTSTRAIN_BOT }}" + + git switch -c "$branch" + git subrepo pull "$VENDORED_PATH" --force + - name: Create pull request + env: + GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + title: '[bot] Update ${{ env.VENDORED_PATH }}' + body: | + This PR was automatically created by https://github.com/nextstrain/shared/actions/runs/${{ github.run_id }} + to update the vendored subrepo in ${{ env.VENDORED_PATH }}. + + Subrepo updates were made with the `--force` flag so it overwrites any local changes in the subrepo. + run: | + default_branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') + changes=$(git rev-list --count "$default_branch".."$branch") + if [[ "$changes" == "1" ]]; then + git push --force origin HEAD + pr_url=$(gh pr list --head "$branch" --json url | jq -r '.[0].url') + + if [[ "$pr_url" == "null" ]]; then + pr_url="$(gh pr create --head "$branch" --title "$title" --body "$body")" + echo "Pull request created: $pr_url" >> "$GITHUB_STEP_SUMMARY" + else + echo "Pull request updated: $pr_url" >> "$GITHUB_STEP_SUMMARY" + fi + elif [[ "$changes" == "0" ]]; then + echo "No pull request created or updated because no changes were made" >> "$GITHUB_STEP_SUMMARY" + else + echo "ERROR: Encountered an unexpected number of changes: $changes" + exit 1 + fi From 6adaca94ba7984d126bd79ba53781d52479f8227 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Tue, 14 Apr 2026 14:46:37 -0700 Subject: [PATCH 2/6] .github/update-downstream-repos: Include old nextstrain/ingest I was unable to get the search/code API to work with the 'OR' syntax so just added a separate query for nextstrain/ingest and concatenated the two arrays. Deduplicated the final array to guard against potential overlap. --- .../workflows/update-downstream-repos.yaml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index 73d215b..43f8c38 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -27,7 +27,7 @@ jobs: # ... # ] run: | - matrix=$(gh api -X GET search/code \ + shared_matrix=$(gh api -X GET search/code \ -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/shared"' \ | jq -c ' .items @@ -36,6 +36,28 @@ jobs: "path": "\(.path | split("/")[0:-1] | join("/"))" }) ') + + # I was unable to get the 'OR' syntax to work with the search/code API, + # so making a separate query for the old nextstrain/ingest repo name. + # -Jover, 14 Apr 2026. + ingest_matrix=$(gh api -X GET search/code \ + -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/ingest"' \ + | jq -c ' + .items + | map({ + "repo": "\(.repository.full_name)", + "path": "\(.path | split("/")[0:-1] | join("/"))" + }) + ') + + # There should not be any overlap between `shared_matrix` and `ingest_matrix` + # but deduplicating with `unique` just in case. + # -Jover, 14 Apr 2026. + matrix=$(jq -n \ + --argjson matrix1 "$shared_matrix" \ + --argjson matrix2 "$ingest_matrix" \ + -c '$matrix1 + $matrix2 | sort_by(.repo, .path) | unique') + echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" update-downstream: name: update-downstream (${{ matrix.repo }}, ${{ matrix.path }}) From 29090c76346c00409e806684783f3d5649a6c6e8 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 15 Apr 2026 14:02:51 -0700 Subject: [PATCH 3/6] .github/update-downstream-repos: dedup matrix by repo We should only be keeping a single copy of the vendored repo in each downstream repo, so deduplicate the matrix by repo. In cases where there are multiple copies, we are prioritizing the `nextstrain/shared` remote since that is the newer version. This is prompted by the error in the workflow when avian-flu had two paths to update. --- .github/workflows/update-downstream-repos.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index 43f8c38..3353ca8 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -50,13 +50,15 @@ jobs: }) ') - # There should not be any overlap between `shared_matrix` and `ingest_matrix` - # but deduplicating with `unique` just in case. - # -Jover, 14 Apr 2026. + # Deduplicate by repo since each repo should only have a single copy + # of the vendored repo. In cases where a repo has both, + # we are prioritizing the nextstrain/shared remote since that is + # the newer repo. + # -Jover, 15 Apr 2026. matrix=$(jq -n \ --argjson matrix1 "$shared_matrix" \ --argjson matrix2 "$ingest_matrix" \ - -c '$matrix1 + $matrix2 | sort_by(.repo, .path) | unique') + -c '$matrix1 + $matrix2 | unique_by(.repo)') echo "downstream-matrix=$matrix" | tee -a "$GITHUB_OUTPUT" update-downstream: From 09ad253681dcbd6494627c626eeee07f060103fe Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 16 Apr 2026 10:39:12 -0700 Subject: [PATCH 4/6] .github/updated-downstream-repos: Use `--paginate` + `--slurp` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GH API calls return 30 items by default¹ so use `--paginate` + `--slurp` to return all pages and get all items.² ¹ ² --- .../workflows/update-downstream-repos.yaml | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index 3353ca8..e5acbfc 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -27,28 +27,26 @@ jobs: # ... # ] run: | - shared_matrix=$(gh api -X GET search/code \ + shared_matrix=$(gh api --paginate --slurp -X GET search/code \ -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/shared"' \ | jq -c ' - .items - | map({ - "repo": "\(.repository.full_name)", - "path": "\(.path | split("/")[0:-1] | join("/"))" - }) - ') + [.[].items[] | { + "repo": .repository.full_name, + "path": (.path | split("/")[0:-1] | join("/")) + }] + ') # I was unable to get the 'OR' syntax to work with the search/code API, # so making a separate query for the old nextstrain/ingest repo name. # -Jover, 14 Apr 2026. - ingest_matrix=$(gh api -X GET search/code \ + ingest_matrix=$(gh api --paginate --slurp -X GET search/code \ -f q='org:nextstrain filename:.gitrepo "remote = https://github.com/nextstrain/ingest"' \ | jq -c ' - .items - | map({ - "repo": "\(.repository.full_name)", - "path": "\(.path | split("/")[0:-1] | join("/"))" - }) - ') + [.[].items[] | { + "repo": .repository.full_name, + "path": (.path | split("/")[0:-1] | join("/")) + }] + ') # Deduplicate by repo since each repo should only have a single copy # of the vendored repo. In cases where a repo has both, From c732fc006804233ae213754d15332281251f2330 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 16 Apr 2026 11:02:09 -0700 Subject: [PATCH 5/6] .github/update-downstream-repos: Remove `--force` flag Do not allow `git subrepo` to overwrite local changes in automatic updates. This will increase failures in the automatic updates but will flag merge conflicts with local changes in downstream repos for manual fixes. Note this requires us to fetch all git history for the downstream repos since `git subrepo pull` now needs to validate the parent commit. Prompted by feedback from @jameshadfield and @victorlin in review --- .github/workflows/update-downstream-repos.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index e5acbfc..d69a87d 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -76,6 +76,9 @@ jobs: uses: actions/checkout@v6 with: repository: ${{ matrix.repo }} + # Fetch all history since `git subrepo pull` needs to check history + # to validate the parent commit + fetch-depth: 0 token: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} # Checkout git-subrepo _after_ the downstream repo to ensure that we # keep it in a path within the downstream repo that does not interfere @@ -93,7 +96,7 @@ jobs: git config user.email "${{ vars.GIT_USER_EMAIL_NEXTSTRAIN_BOT }}" git switch -c "$branch" - git subrepo pull "$VENDORED_PATH" --force + git subrepo pull "$VENDORED_PATH" - name: Create pull request env: GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} @@ -101,8 +104,6 @@ jobs: body: | This PR was automatically created by https://github.com/nextstrain/shared/actions/runs/${{ github.run_id }} to update the vendored subrepo in ${{ env.VENDORED_PATH }}. - - Subrepo updates were made with the `--force` flag so it overwrites any local changes in the subrepo. run: | default_branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') changes=$(git rev-list --count "$default_branch".."$branch") From 23acdb4bc2e06a9454a6a803f5f3c6c12745f392 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Thu, 16 Apr 2026 15:19:28 -0700 Subject: [PATCH 6/6] .github/update-downstream-repos: additive updates only Try to fetch remote branch and add subrepo changes to prevent automatic updates from overwriting manual updates. This also allows us to skip unnecessary pushes to the PR if the remote branch is already up-to-date. --- .../workflows/update-downstream-repos.yaml | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update-downstream-repos.yaml b/.github/workflows/update-downstream-repos.yaml index d69a87d..a822a49 100644 --- a/.github/workflows/update-downstream-repos.yaml +++ b/.github/workflows/update-downstream-repos.yaml @@ -95,20 +95,38 @@ jobs: git config user.name "${{ vars.GIT_USER_NAME_NEXTSTRAIN_BOT }}" git config user.email "${{ vars.GIT_USER_EMAIL_NEXTSTRAIN_BOT }}" - git switch -c "$branch" + # Default branch as the default parent + parent=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') + # Fetch remote branch if it exists and switch to the branch + # Otherwise just create the branch locally + if git fetch origin "$branch" 2>/dev/null; then + parent="origin/$branch" + git switch "$branch" + else + git switch -c "$branch" + fi + git subrepo pull "$VENDORED_PATH" + + # Check for changes to determine if we need to create/update PRs + changes=$(git rev-list --count "$parent".."$branch") + echo "changes=$changes" | tee -a "$GITHUB_ENV" - name: Create pull request env: GH_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_REPO }} + # Define env to appease shellcheck + changes: ${{ env.changes }} title: '[bot] Update ${{ env.VENDORED_PATH }}' body: | This PR was automatically created by https://github.com/nextstrain/shared/actions/runs/${{ github.run_id }} to update the vendored subrepo in ${{ env.VENDORED_PATH }}. + + Subrepo changes may break workflows and require manual updates to + fix errors. It is safe to add manual updates directly to this PR + since they will not be overwritten by future automatic updates. run: | - default_branch=$(git remote show origin | sed -n '/HEAD branch/s/.*: //p') - changes=$(git rev-list --count "$default_branch".."$branch") if [[ "$changes" == "1" ]]; then - git push --force origin HEAD + git push origin HEAD pr_url=$(gh pr list --head "$branch" --json url | jq -r '.[0].url') if [[ "$pr_url" == "null" ]]; then