Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
43ec9da
Bump tar in the npm_and_yarn group across 1 directory
dependabot[bot] Mar 11, 2026
8e9bf55
Merge pull request #54 from dotcomrow/prod
mephmanx Mar 13, 2026
df3a47f
adding auto retry on terraform failure
mephmanx Mar 13, 2026
f93a5b3
fixing js headers to allow caching
mephmanx Mar 13, 2026
2d4ddaf
limit terraform auto-retry detection to deploy branches
mephmanx Mar 13, 2026
9e21686
limit terraform auto-retry detection to deploy branches
mephmanx Mar 13, 2026
8b7f13f
use grep in terraform auto-retry log detection
mephmanx Mar 13, 2026
dae3f1b
use grep in terraform auto-retry log detection
mephmanx Mar 13, 2026
304d380
add scheduled terraform deploy auto-retry polling
mephmanx Mar 13, 2026
6ca4ad2
add scheduled terraform deploy auto-retry polling
mephmanx Mar 13, 2026
a39c08d
fix jq parsing in scheduled terraform auto-retry
mephmanx Mar 13, 2026
5f93fa2
fix jq parsing in scheduled terraform auto-retry
mephmanx Mar 13, 2026
305e70f
make terraform auto-retry self-monitor until success
mephmanx Mar 13, 2026
72f5259
make terraform auto-retry self-monitor until success
mephmanx Mar 13, 2026
3c26751
fix auto-retry workflow matrix if compile error
mephmanx Mar 13, 2026
3a1ddec
fix auto-retry workflow matrix if compile error
mephmanx Mar 13, 2026
d45a255
adding runtime config injection
mephmanx Mar 14, 2026
034bb13
Merge pull request #55 from dotcomrow/dev
mephmanx Mar 14, 2026
2356b98
Merge pull request #56 from dotcomrow/prod
mephmanx Mar 14, 2026
711f166
adding support for MFE's
mephmanx Mar 15, 2026
ac6d6f4
Merge pull request #49 from dotcomrow/dependabot/npm_and_yarn/npm_and…
mephmanx Mar 15, 2026
12c4b57
Merge pull request #58 from dotcomrow/prod
mephmanx Mar 15, 2026
6e0d138
Merge pull request #59 from dotcomrow/dev
mephmanx Mar 15, 2026
42d1e34
Bump the npm_and_yarn group across 1 directory with 2 updates
dependabot[bot] Mar 17, 2026
4af9eb1
Merge pull request #60 from dotcomrow/dependabot/npm_and_yarn/npm_and…
mephmanx Mar 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 119 additions & 41 deletions .github/workflows/terraform-deploy-auto-retry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,73 +4,151 @@ on:
workflow_run:
workflows:
- Terraform Deploy
branches:
- dev
- prod
types:
- completed
schedule:
- cron: "*/5 * * * *"
workflow_dispatch:

permissions:
actions: write
contents: read

jobs:
rerun-on-tfc-discovery-timeout:
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
retry-until-success:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
branch:
- dev
- prod
concurrency:
group: terraform-deploy-auto-retry-${{ matrix.branch }}
cancel-in-progress: false

steps:
- name: Evaluate Terraform Deploy failure and optionally rerun
- name: Retry failed Terraform deploys until successful
env:
GH_TOKEN: ${{ github.token }}
GH_TOKEN: ${{ secrets.ACTIONS_BOT_TOKEN || github.token }}
OWNER: ${{ github.repository_owner }}
REPO: ${{ github.event.repository.name }}
RUN_ID: ${{ github.event.workflow_run.id }}
HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
EVENT_NAME: ${{ github.event_name }}
BRANCH: ${{ matrix.branch }}
WORKFLOW_RUN_BRANCH: ${{ github.event.workflow_run.head_branch }}
WORKFLOW_RUN_CONCLUSION: ${{ github.event.workflow_run.conclusion }}
TARGET_WORKFLOW_NAME: "Terraform Deploy"
TARGET_WORKFLOW_FILE: "terraform-deploy.yml"
run: |
set -euo pipefail

echo "Terraform Deploy run id: ${RUN_ID}"
echo "Failed run branch: ${HEAD_BRANCH}"
should_monitor_branch() {
if [ "${EVENT_NAME}" != "workflow_run" ]; then
return 0
fi

workdir="$(mktemp -d)"
trap 'rm -rf "${workdir}"' EXIT
logs_zip="${workdir}/logs.zip"
logs_dir="${workdir}/logs"
if [ "${WORKFLOW_RUN_BRANCH}" != "${BRANCH}" ]; then
echo "workflow_run was for '${WORKFLOW_RUN_BRANCH}', this job is '${BRANCH}'. Skipping."
return 1
fi

downloaded="false"
for i in 1 2 3 4 5 6; do
if gh api "/repos/${OWNER}/${REPO}/actions/runs/${RUN_ID}/logs" > "${logs_zip}"; then
downloaded="true"
break
if [ "${WORKFLOW_RUN_CONCLUSION}" != "failure" ]; then
echo "workflow_run conclusion for '${BRANCH}' was '${WORKFLOW_RUN_CONCLUSION}'. Nothing to retry."
return 1
fi
echo "Run logs not ready yet, retrying in 10s (${i}/6)."
sleep 10
done

if [ "${downloaded}" != "true" ]; then
echo "Could not download logs for failed run; skipping automatic rerun."
exit 0
fi
return 0
}

unzip -q "${logs_zip}" -d "${logs_dir}"
latest_run_json() {
gh run list -R "${OWNER}/${REPO}" \
--workflow "${TARGET_WORKFLOW_NAME}" \
--branch "${BRANCH}" \
--limit 1 \
--json databaseId,status,conclusion,createdAt,url,event || echo "[]"
}

if rg -n -F "Failed to request discovery document" "${logs_dir}" >/dev/null \
|| rg -n -F "https://app.terraform.io/.well-known/terraform.json" "${logs_dir}" >/dev/null \
|| rg -n -F "context deadline exceeded (Client.Timeout exceeded while awaiting headers)" "${logs_dir}" >/dev/null; then
if [ -z "${HEAD_BRANCH}" ]; then
echo "Head branch is empty; cannot dispatch Terraform Deploy retry."
exit 0
fi
dispatch_branch() {
echo "Dispatching ${TARGET_WORKFLOW_NAME} on branch '${BRANCH}'."
gh api --method POST "/repos/${OWNER}/${REPO}/actions/workflows/${TARGET_WORKFLOW_FILE}/dispatches" -f ref="${BRANCH}" >/dev/null
}

if [ "${HEAD_BRANCH}" != "dev" ] && [ "${HEAD_BRANCH}" != "prod" ]; then
echo "Head branch '${HEAD_BRANCH}' is not a deploy branch; skipping retry dispatch."
exit 0
fi
monitor_branch_until_success() {
local latest_json
local run_id
local latest_status
local run_conclusion
local previous_failed_run_id=""
local wait_count=0

while true; do
latest_json="$(latest_run_json)"
if [ -z "${latest_json}" ] || [ "$(echo "${latest_json}" | jq 'length')" -eq 0 ]; then
echo "No ${TARGET_WORKFLOW_NAME} runs found for '${BRANCH}'. Waiting 20s."
sleep 20
continue
fi

run_id="$(echo "${latest_json}" | jq -r '.[0].databaseId')"
latest_status="$(echo "${latest_json}" | jq -r '.[0].status')"
run_conclusion="$(echo "${latest_json}" | jq -r '.[0].conclusion // ""')"

echo "Branch '${BRANCH}' latest run=${run_id} status=${latest_status} conclusion=${run_conclusion}"

if [ "${latest_status}" = "in_progress" ] || [ "${latest_status}" = "queued" ]; then
sleep 20
continue
fi

if [ "${run_conclusion}" = "success" ]; then
echo "Branch '${BRANCH}' latest run succeeded. Retry loop complete."
return 0
fi

if [ "${run_conclusion}" = "failure" ] || [ "${run_conclusion}" = "cancelled" ] || [ "${run_conclusion}" = "timed_out" ] || [ "${run_conclusion}" = "startup_failure" ]; then
if [ "${previous_failed_run_id}" = "${run_id}" ]; then
sleep 20
continue
fi

previous_failed_run_id="${run_id}"
dispatch_branch
wait_count=0

while true; do
sleep 15
latest_json="$(latest_run_json)"
if [ -z "${latest_json}" ] || [ "$(echo "${latest_json}" | jq 'length')" -eq 0 ]; then
continue
fi

run_id="$(echo "${latest_json}" | jq -r '.[0].databaseId')"
if [ "${run_id}" != "${previous_failed_run_id}" ]; then
echo "Detected new ${TARGET_WORKFLOW_NAME} run ${run_id} on '${BRANCH}'."
break
fi

wait_count=$((wait_count + 1))
if [ "${wait_count}" -ge 40 ]; then
echo "Dispatched run not visible yet on '${BRANCH}'. Re-dispatching."
dispatch_branch
wait_count=0
fi
done
continue
fi

echo "Detected Terraform Cloud discovery timeout. Waiting 120s before retry dispatch."
sleep 120
echo "Branch '${BRANCH}' has non-retryable conclusion '${run_conclusion}'. Waiting 20s."
sleep 20
done
}

echo "Dispatching Terraform Deploy on branch '${HEAD_BRANCH}'."
gh api --method POST "/repos/${OWNER}/${REPO}/actions/workflows/terraform-deploy.yml/dispatches" -f ref="${HEAD_BRANCH}"
if ! should_monitor_branch; then
exit 0
fi

echo "Failure reason did not match Terraform Cloud discovery timeout; no rerun requested."
echo "Starting retry monitor for branch '${BRANCH}' (event=${EVENT_NAME})."
monitor_branch_until_success
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ renders slot placeholders (`{{ slot:main }}`, etc.), and serves CMS HTML directl

## Browser Cache Versioning

- HTML responses default to `Cache-Control: public, max-age=31536000, immutable`.
- Static assets default to `Cache-Control: public, max-age=31536000, immutable`.
- HTML responses default to `Cache-Control: public, max-age=0, must-revalidate` (override with `app_html_cache_control`).
- Next build assets under `/_next/static/*` use `Cache-Control: public, max-age=31536000, immutable` via [`public/_headers`](public/_headers).
- Worker-handled static routes default to `Cache-Control: public, max-age=31536000, immutable` (override with `app_static_cache_control`).
- API routes default to `Cache-Control: no-store, no-cache, must-revalidate`.
- The page includes a content hash (`data-content-hash`) and checks `/api/cache/version`.
- If the hash changes after a Directus refresh/deploy, the client reloads with `?cmsv=<hash>`.
Expand Down
Loading
Loading