From e08a7b4456ffd5583e1fdaa3ebde4c301d0e0df2 Mon Sep 17 00:00:00 2001 From: Lukas Audzevicius Date: Mon, 12 Jan 2026 11:48:55 +0000 Subject: [PATCH 1/4] (CAT-2526) Implement retry system Lately, we have been noticing an increase in failed agent installations steps during our module acceptance testing. These failures are often transient and start working after a few retries. This PR adds a retry mechanism to avoid rekicking the entire job which can be time and resource consuming. --- .github/workflows/module_acceptance.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/module_acceptance.yml b/.github/workflows/module_acceptance.yml index 1bfa5be..d939222 100644 --- a/.github/workflows/module_acceptance.yml +++ b/.github/workflows/module_acceptance.yml @@ -145,13 +145,18 @@ jobs: sed -e 's/password: .*/password: "[redacted]"/' < $FILE || true - name: "Install Puppet agent" - run: | - if [[ "${{ matrix.collection.version }}" ]] ; then - export PUPPET_VERSION=${{ matrix.collection.version }} - bundle exec rake 'litmus:install_agent[${{ matrix.collection.collection }}]' - else - bundle exec rake 'litmus:install_agent[${{ matrix.collection }}]' - fi + uses: nick-fields/retry@v3 + with: + timeout_minutes: 10 + max_attempts: 3 + retry_wait_seconds: 30 + command: | + if [[ "${{ matrix.collection.version }}" ]] ; then + export PUPPET_VERSION=${{ matrix.collection.version }} + bundle exec rake 'litmus:install_agent[${{ matrix.collection.collection }}]' + else + bundle exec rake 'litmus:install_agent[${{ matrix.collection }}]' + fi - name: "Install module" run: | From e14b4015ab5e5a980342fccbe60ec4ac901d1e90 Mon Sep 17 00:00:00 2001 From: Lukas Audzevicius Date: Mon, 12 Jan 2026 11:57:51 +0000 Subject: [PATCH 2/4] Remove bloated titles --- .github/workflows/module_acceptance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/module_acceptance.yml b/.github/workflows/module_acceptance.yml index d939222..a29e2e4 100644 --- a/.github/workflows/module_acceptance.yml +++ b/.github/workflows/module_acceptance.yml @@ -69,7 +69,7 @@ jobs: bundle exec matrix_from_metadata_v3 ${{ inputs.flags }} acceptance: - name: "Acceptance tests (${{matrix.platforms.label}}, ${{matrix.collection.collection || matrix.collection}})" + name: "${{matrix.platforms.label}}, ${{matrix.collection.collection || matrix.collection}}" needs: "setup_matrix" runs-on: ${{ inputs.runs_on || matrix.platforms.runner }} timeout-minutes: 180 From 80a0bb13874e176c61267236e67432523e070d32 Mon Sep 17 00:00:00 2001 From: Lukas Audzevicius Date: Mon, 12 Jan 2026 14:39:26 +0000 Subject: [PATCH 3/4] test retry mechanism with different parameters --- .github/workflows/module_acceptance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/module_acceptance.yml b/.github/workflows/module_acceptance.yml index a29e2e4..5db62eb 100644 --- a/.github/workflows/module_acceptance.yml +++ b/.github/workflows/module_acceptance.yml @@ -148,7 +148,7 @@ jobs: uses: nick-fields/retry@v3 with: timeout_minutes: 10 - max_attempts: 3 + max_attempts: 10 retry_wait_seconds: 30 command: | if [[ "${{ matrix.collection.version }}" ]] ; then From 9837fae1a7e37cc5c91e4fd0a47c0d159ae9dd27 Mon Sep 17 00:00:00 2001 From: Lukas Audzevicius Date: Tue, 13 Jan 2026 14:08:58 +0000 Subject: [PATCH 4/4] apply retry to DNS fix and agent installation --- .github/workflows/module_acceptance.yml | 36 ++++++++++++------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/.github/workflows/module_acceptance.yml b/.github/workflows/module_acceptance.yml index 5db62eb..faf454a 100644 --- a/.github/workflows/module_acceptance.yml +++ b/.github/workflows/module_acceptance.yml @@ -89,24 +89,6 @@ jobs: with: service-key: ${{ env.TWINGATE_PUBLIC_REPO_KEY }} - - name: Fix DNS - run: | - echo "=== Remove Azure DNS from eth0 interface ===" - sudo resolvectl dns eth0 "" - - echo "=== Configure Twingate DNS properly ===" - sudo resolvectl dns sdwan0 100.95.0.251 100.95.0.252 - sudo resolvectl domain sdwan0 delivery.puppetlabs.net - - echo "=== Flush DNS cache ===" - sudo resolvectl flush-caches - - echo "=== Check new configuration ===" - resolvectl status - - echo "=== Test DNS resolution ===" - nslookup artifactory.delivery.puppetlabs.net - - name: "Checkout" uses: "actions/checkout@v4" @@ -144,13 +126,29 @@ jobs: FILE='spec/fixtures/litmus_inventory.yaml' sed -e 's/password: .*/password: "[redacted]"/' < $FILE || true - - name: "Install Puppet agent" + - name: "Fix DNS and Install Puppet agent" uses: nick-fields/retry@v3 with: timeout_minutes: 10 max_attempts: 10 retry_wait_seconds: 30 command: | + echo "=== Remove Azure DNS from eth0 interface ===" + sudo resolvectl dns eth0 "" + + echo "=== Configure Twingate DNS properly ===" + sudo resolvectl dns sdwan0 100.95.0.251 100.95.0.252 + sudo resolvectl domain sdwan0 delivery.puppetlabs.net + + echo "=== Flush DNS cache ===" + sudo resolvectl flush-caches + + echo "=== Check new configuration ===" + resolvectl status + + echo "=== Test DNS resolution ===" + nslookup artifactory.delivery.puppetlabs.net + if [[ "${{ matrix.collection.version }}" ]] ; then export PUPPET_VERSION=${{ matrix.collection.version }} bundle exec rake 'litmus:install_agent[${{ matrix.collection.collection }}]'