From 39f67450569c3153090368fe1d4db3075fedd424 Mon Sep 17 00:00:00 2001 From: Eason WaveKat Date: Tue, 19 May 2026 07:43:02 +1200 Subject: [PATCH 1/2] Add docker Github Runner setup --- scripts/docker/Dockerfile | 68 ++++++++++ scripts/docker/entrypoint.sh | 56 ++++++++ scripts/setup-gha-runners-docker.sh | 168 ++++++++++++++++++++++++ scripts/uninstall-gha-runners-docker.sh | 79 +++++++++++ vendor/wavekat-brand | 2 +- 5 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 scripts/docker/Dockerfile create mode 100755 scripts/docker/entrypoint.sh create mode 100755 scripts/setup-gha-runners-docker.sh create mode 100755 scripts/uninstall-gha-runners-docker.sh diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile new file mode 100644 index 0000000..1782201 --- /dev/null +++ b/scripts/docker/Dockerfile @@ -0,0 +1,68 @@ +# syntax=docker/dockerfile:1.7 + +# Self-hosted GitHub Actions runner image for the `wavekat` org. +# +# Build (resolves the latest runner release if RUNNER_VERSION is empty): +# docker build -t wavekat/gha-runner:latest scripts/docker +# +# Or pin a version: +# docker build --build-arg RUNNER_VERSION=2.334.0 \ +# -t wavekat/gha-runner:2.334.0 scripts/docker + +FROM ubuntu:24.04 + +ARG RUNNER_VERSION=2.334.0 +ARG DEBIAN_FRONTEND=noninteractive + +# Base OS + common build deps used across wavekat workflows. +# Per-job `apt-get install` still works inside the container (no host +# lock contention) so workflows can add packages as needed. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates curl wget git jq sudo gnupg unzip xz-utils zstd \ + build-essential pkg-config libssl-dev libicu-dev \ + librsvg2-bin libasound2-dev \ + python3 python3-pip \ + tzdata locales \ + && locale-gen en_US.UTF-8 \ + && rm -rf /var/lib/apt/lists/* + +ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 + +# Node 22 (matches CLAUDE.md project requirement) + corepack/pnpm. +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && corepack enable \ + && rm -rf /var/lib/apt/lists/* + +# Non-root runner user. Passwordless sudo so workflows can still do +# `sudo apt-get install ...` for one-off deps — safe inside the +# container's isolated rootfs. +RUN useradd -m -s /bin/bash runner \ + && echo 'runner ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/runner \ + && chmod 0440 /etc/sudoers.d/runner + +# Stage the actions/runner tarball into /opt/runner-template. The +# entrypoint copies this into the runtime work dir on first start so +# image rebuilds don't clobber persisted registration state. +RUN set -eux; \ + ARCH="$(dpkg --print-architecture)"; \ + case "$ARCH" in \ + amd64) RARCH=x64 ;; \ + arm64) RARCH=arm64 ;; \ + *) echo "unsupported arch: $ARCH" >&2; exit 1 ;; \ + esac; \ + mkdir -p /opt/runner-template; \ + curl -fsSL -o /tmp/runner.tar.gz \ + "https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-${RARCH}-${RUNNER_VERSION}.tar.gz"; \ + tar -xzf /tmp/runner.tar.gz -C /opt/runner-template; \ + rm /tmp/runner.tar.gz; \ + /opt/runner-template/bin/installdependencies.sh; \ + chown -R runner:runner /opt/runner-template + +COPY --chown=runner:runner entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +USER runner +WORKDIR /home/runner/runner + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/scripts/docker/entrypoint.sh b/scripts/docker/entrypoint.sh new file mode 100755 index 0000000..85948f1 --- /dev/null +++ b/scripts/docker/entrypoint.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# +# Container entrypoint for a self-hosted GitHub Actions runner. +# +# First start: copies the staged runner binaries from /opt/runner-template +# into the persistent work dir, registers with the org using +# RUNNER_TOKEN, then execs `./run.sh`. +# +# Subsequent starts: skips config, reconnects with the persisted +# `.runner` / `.credentials` files. RUNNER_TOKEN is only needed for the +# first start (a registration token is valid 1h and burned on use). +# +# Required env: +# RUNNER_ORG — GitHub org (e.g. wavekat) +# RUNNER_NAME — unique name for this runner +# RUNNER_LABELS — comma-separated labels +# RUNNER_TOKEN — registration token (first start only) +# +# Optional: +# RUNNER_GROUP — runner group (default: Default) + +set -euo pipefail + +: "${RUNNER_ORG:?RUNNER_ORG is required}" +: "${RUNNER_NAME:?RUNNER_NAME is required}" +: "${RUNNER_LABELS:?RUNNER_LABELS is required}" + +RUNNER_GROUP="${RUNNER_GROUP:-Default}" +WORK_DIR="/home/runner/runner" + +if [[ ! -f "${WORK_DIR}/config.sh" ]]; then + cp -a /opt/runner-template/. "${WORK_DIR}/" +fi + +cd "${WORK_DIR}" + +if [[ ! -f .runner ]]; then + if [[ -z "${RUNNER_TOKEN:-}" ]]; then + echo "first start but RUNNER_TOKEN is empty — cannot register" >&2 + exit 1 + fi + ./config.sh \ + --unattended \ + --replace \ + --url "https://github.com/${RUNNER_ORG}" \ + --token "${RUNNER_TOKEN}" \ + --name "${RUNNER_NAME}" \ + --runnergroup "${RUNNER_GROUP}" \ + --labels "${RUNNER_LABELS}" \ + --work _work +fi + +# Forward SIGTERM cleanly so the runner finishes its current job before +# exiting. Do NOT call `./config.sh remove` here — we want the +# registration to persist across container restarts. +exec ./run.sh diff --git a/scripts/setup-gha-runners-docker.sh b/scripts/setup-gha-runners-docker.sh new file mode 100755 index 0000000..72093b1 --- /dev/null +++ b/scripts/setup-gha-runners-docker.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +# +# Install N self-hosted GitHub Actions runners on a single Linux host +# as Docker containers, one runner per container, managed by systemd. +# +# Each container has its own rootfs, so apt locks / port conflicts / tmp +# clashes between parallel jobs are no longer possible. Registration +# state is persisted in a per-container Docker volume so host reboots +# don't require re-registration. +# +# Usage (run on the target machine, as a user with sudo): +# +# # Easiest: let the script fetch a registration token via gh CLI. +# # (`gh auth login` once with an account that has wavekat org admin) +# ./setup-gha-runners-docker.sh +# +# # Or pass a token explicitly (valid 1h, can register multiple runners): +# RUNNER_TOKEN=AAAA... ./setup-gha-runners-docker.sh +# +# # Override defaults: +# RUNNER_COUNT=6 RUNNER_PREFIX=aoc-m3l RUNNER_LABELS=aoc-m3l,gpu \ +# ./setup-gha-runners-docker.sh +# +# Re-running is safe: existing containers/services are torn down and +# re-registered. The registration token is only consumed on the first +# start of a runner — subsequent restarts use the cached credentials. + +set -euo pipefail + +ORG="${RUNNER_ORG:-wavekat}" +COUNT="${RUNNER_COUNT:-4}" +PREFIX="${RUNNER_PREFIX:-$(hostname -s)}" +RUNNER_LABELS="${RUNNER_LABELS:-wavekat-ci,${PREFIX}}" +IMAGE="${RUNNER_IMAGE:-wavekat/gha-runner:latest}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DOCKER_CONTEXT="${SCRIPT_DIR}/docker" + +log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; } +warn() { printf '\033[1;33m!!\033[0m %s\n' "$*" >&2; } +die() { printf '\033[1;31mxx\033[0m %s\n' "$*" >&2; exit 1; } + +[[ "$(uname -s)" == "Linux" ]] || die "this script targets Linux (got $(uname -s))" +[[ -d "${DOCKER_CONTEXT}" ]] || die "missing docker context at ${DOCKER_CONTEXT}" + +# 1. Ensure Docker is installed. +if ! command -v docker >/dev/null 2>&1; then + log "installing docker (docker.io from Ubuntu archive)" + sudo apt-get update -y + sudo apt-get install -y --no-install-recommends docker.io + sudo systemctl enable --now docker +fi + +# 2. Build the runner image. +log "building runner image ${IMAGE}" +sudo docker build -t "${IMAGE}" "${DOCKER_CONTEXT}" + +# 3. Fetch a registration token (one token can register multiple runners +# within its 1h validity window). +get_token() { + if [[ -n "${RUNNER_TOKEN:-}" ]]; then + printf '%s' "${RUNNER_TOKEN}" + return + fi + if ! command -v gh >/dev/null 2>&1; then + cat >&2 <<'EOF' + +No RUNNER_TOKEN set, and `gh` CLI is not installed. + +Install gh on Ubuntu and re-run, or fetch a token elsewhere: + + sudo apt-get install -y gh + gh auth login # use an account with wavekat org admin + + # or, from any machine with gh authed as a wavekat admin: + gh api -X POST /orgs/wavekat/actions/runners/registration-token --jq .token + RUNNER_TOKEN= ./setup-gha-runners-docker.sh +EOF + exit 1 + fi + gh api -X POST "/orgs/${ORG}/actions/runners/registration-token" --jq .token \ + || die "failed to fetch registration token (is gh authed as a wavekat admin?)" +} + +TOKEN="$(get_token)" +[[ -n "${TOKEN}" ]] || die "got empty registration token" + +# 4. Install a systemd template unit. One container per instance, each +# with its own named volume so the registration persists across +# restarts and host reboots. +UNIT_PATH="/etc/systemd/system/gha-runner@.service" +log "writing ${UNIT_PATH}" +sudo tee "${UNIT_PATH}" >/dev/null </dev/null 2>&1 || true + sudo docker volume rm "gha-runner-${i}" >/dev/null 2>&1 || true + + sudo tee "${ENV_FILE}" >/dev/null <\033[0m %s\n' "$*"; } +warn() { printf '\033[1;33m!!\033[0m %s\n' "$*" >&2; } +die() { printf '\033[1;31mxx\033[0m %s\n' "$*" >&2; exit 1; } + +get_token() { + if [[ -n "${RUNNER_TOKEN:-}" ]]; then + printf '%s' "${RUNNER_TOKEN}" + return + fi + if ! command -v gh >/dev/null 2>&1; then + cat >&2 <<'EOF' + +No RUNNER_TOKEN set, and `gh` CLI is not installed. + +Install gh on Ubuntu and re-run, or fetch a remove-token elsewhere: + + sudo apt-get install -y gh + gh auth login + + # or, from any machine with gh authed as a wavekat admin: + gh api -X POST /orgs/wavekat/actions/runners/remove-token --jq .token + RUNNER_TOKEN= ./uninstall-gha-runners-docker.sh +EOF + exit 1 + fi + gh api -X POST "/orgs/${ORG}/actions/runners/remove-token" --jq .token +} + +TOKEN="$(get_token)" +[[ -n "${TOKEN}" ]] || die "got empty remove token" + +for i in $(seq 1 "${COUNT}"); do + NAME="${PREFIX}-${i}" + log "removing runner ${NAME}" + + # De-register inside the container before tearing it down (best effort — + # if the volume is already gone the runner is effectively orphaned and + # will need to be removed from the GitHub UI manually). + if sudo docker ps -a --format '{{.Names}}' | grep -q "^gha-runner-${i}$"; then + sudo docker exec "gha-runner-${i}" \ + bash -c "cd /home/runner/runner && ./config.sh remove --token '${TOKEN}'" \ + || warn "in-container de-register failed for ${NAME} (will be removed locally regardless)" + fi + + sudo systemctl disable --now "gha-runner@${i}.service" 2>/dev/null || true + sudo docker rm -f "gha-runner-${i}" >/dev/null 2>&1 || true + sudo docker volume rm "gha-runner-${i}" >/dev/null 2>&1 || true + sudo rm -f "/etc/gha-runner/${i}.env" +done + +# Remove the template unit only if no instances remain enabled. +if ! systemctl list-units --all 'gha-runner@*.service' --no-legend | grep -q .; then + log "removing systemd template unit" + sudo rm -f /etc/systemd/system/gha-runner@.service + sudo rmdir /etc/gha-runner 2>/dev/null || true + sudo systemctl daemon-reload +fi + +log "done" diff --git a/vendor/wavekat-brand b/vendor/wavekat-brand index 079128b..890cda6 160000 --- a/vendor/wavekat-brand +++ b/vendor/wavekat-brand @@ -1 +1 @@ -Subproject commit 079128bc47a791f8d0c9df05f8d20227a27b371a +Subproject commit 890cda6ff19605ddd86c2ee8a74d7569414fda75 From bb396f5986d0499b2f71aca0d81ab80d5dc2b490 Mon Sep 17 00:00:00 2001 From: Eason WaveKat Date: Tue, 19 May 2026 08:00:03 +1200 Subject: [PATCH 2/2] fix(gha-runners): chown named-volume in entrypoint; keep token in env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fresh Docker named volumes mount as root:root, so the `runner` user couldn't write to /home/runner/runner on first start and config.sh failed with permission denied. Entrypoint now starts as root, chowns the volume, and re-execs itself as `runner` via sudo. Also stop scrubbing RUNNER_TOKEN from env files after start — the token is harmless after 1h and scrubbing made systemd's auto-restart unable to recover when config.sh failed on the first attempt. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/docker/Dockerfile | 11 ++++++++--- scripts/docker/entrypoint.sh | 10 ++++++++++ scripts/setup-gha-runners-docker.sh | 14 +++++--------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile index 1782201..ee10dcd 100644 --- a/scripts/docker/Dockerfile +++ b/scripts/docker/Dockerfile @@ -59,10 +59,15 @@ RUN set -eux; \ /opt/runner-template/bin/installdependencies.sh; \ chown -R runner:runner /opt/runner-template -COPY --chown=runner:runner entrypoint.sh /usr/local/bin/entrypoint.sh -RUN chmod +x /usr/local/bin/entrypoint.sh +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh \ + && mkdir -p /home/runner/runner \ + && chown runner:runner /home/runner /home/runner/runner -USER runner +# Entrypoint starts as root so it can chown the named-volume mountpoint +# (Docker creates fresh named volumes as root:root regardless of the +# image's directory ownership). It then drops to the `runner` user +# before exec'ing the GitHub Actions runner. WORKDIR /home/runner/runner ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/scripts/docker/entrypoint.sh b/scripts/docker/entrypoint.sh index 85948f1..f83a1aa 100755 --- a/scripts/docker/entrypoint.sh +++ b/scripts/docker/entrypoint.sh @@ -28,6 +28,16 @@ set -euo pipefail RUNNER_GROUP="${RUNNER_GROUP:-Default}" WORK_DIR="/home/runner/runner" +# First-boot fix: Docker creates fresh named volumes owned by root:root, +# so the `runner` user can't write into the mountpoint until we chown +# it. Do that as root, then re-exec ourselves as `runner`. +if [[ "$(id -u)" -eq 0 ]]; then + chown -R runner:runner "${WORK_DIR}" + exec sudo \ + --preserve-env=RUNNER_ORG,RUNNER_NAME,RUNNER_LABELS,RUNNER_TOKEN,RUNNER_GROUP \ + -u runner -H /usr/local/bin/entrypoint.sh "$@" +fi + if [[ ! -f "${WORK_DIR}/config.sh" ]]; then cp -a /opt/runner-template/. "${WORK_DIR}/" fi diff --git a/scripts/setup-gha-runners-docker.sh b/scripts/setup-gha-runners-docker.sh index 72093b1..d10affe 100755 --- a/scripts/setup-gha-runners-docker.sh +++ b/scripts/setup-gha-runners-docker.sh @@ -152,15 +152,11 @@ for i in $(seq 1 "${COUNT}"); do sudo systemctl enable --now "gha-runner@${i}.service" done -# 7. After the first successful start, the registration token has been -# burned. Scrub it from the env files so a restart can't accidentally -# try to re-register with a dead token (the entrypoint skips config -# when .runner already exists in the volume). -log "scrubbing one-shot registration tokens from env files" -for i in $(seq 1 "${COUNT}"); do - ENV_FILE="/etc/gha-runner/${i}.env" - sudo sed -i 's/^RUNNER_TOKEN=.*/RUNNER_TOKEN=/' "${ENV_FILE}" -done +# NOTE: we intentionally do NOT scrub RUNNER_TOKEN from the env files +# after start. The token is single-use and expires after 1h, so leaving +# it in a root-owned 0600 file is harmless. Scrubbing it would make +# systemd's auto-restart unable to recover if config.sh failed on the +# first attempt (no token in the env file → no way to register). log "done — ${COUNT} runner(s) registered to ${ORG}" log "check status: systemctl list-units 'gha-runner@*.service'"