Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions scripts/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# syntax=docker/dockerfile:1.7

# Self-hosted GitHub Actions runner image for the `wavekat` org.
#
# Build (resolves the latest runner release if RUNNER_VERSION is empty):
# docker build -t wavekat/gha-runner:latest scripts/docker
#
# Or pin a version:
# docker build --build-arg RUNNER_VERSION=2.334.0 \
# -t wavekat/gha-runner:2.334.0 scripts/docker

FROM ubuntu:24.04

ARG RUNNER_VERSION=2.334.0
ARG DEBIAN_FRONTEND=noninteractive

# Base OS + common build deps used across wavekat workflows.
# Per-job `apt-get install` still works inside the container (no host
# lock contention) so workflows can add packages as needed.
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl wget git jq sudo gnupg unzip xz-utils zstd \
build-essential pkg-config libssl-dev libicu-dev \
librsvg2-bin libasound2-dev \
python3 python3-pip \
tzdata locales \
&& locale-gen en_US.UTF-8 \
&& rm -rf /var/lib/apt/lists/*

ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8

# Node 22 (matches CLAUDE.md project requirement) + corepack/pnpm.
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - \
&& apt-get install -y --no-install-recommends nodejs \
&& corepack enable \
&& rm -rf /var/lib/apt/lists/*

# Non-root runner user. Passwordless sudo so workflows can still do
# `sudo apt-get install ...` for one-off deps — safe inside the
# container's isolated rootfs.
RUN useradd -m -s /bin/bash runner \
&& echo 'runner ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/runner \
&& chmod 0440 /etc/sudoers.d/runner

# Stage the actions/runner tarball into /opt/runner-template. The
# entrypoint copies this into the runtime work dir on first start so
# image rebuilds don't clobber persisted registration state.
RUN set -eux; \
ARCH="$(dpkg --print-architecture)"; \
case "$ARCH" in \
amd64) RARCH=x64 ;; \
arm64) RARCH=arm64 ;; \
*) echo "unsupported arch: $ARCH" >&2; exit 1 ;; \
esac; \
mkdir -p /opt/runner-template; \
curl -fsSL -o /tmp/runner.tar.gz \
"https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-${RARCH}-${RUNNER_VERSION}.tar.gz"; \
tar -xzf /tmp/runner.tar.gz -C /opt/runner-template; \
rm /tmp/runner.tar.gz; \
/opt/runner-template/bin/installdependencies.sh; \
chown -R runner:runner /opt/runner-template

COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh \
&& mkdir -p /home/runner/runner \
&& chown runner:runner /home/runner /home/runner/runner

# Entrypoint starts as root so it can chown the named-volume mountpoint
# (Docker creates fresh named volumes as root:root regardless of the
# image's directory ownership). It then drops to the `runner` user
# before exec'ing the GitHub Actions runner.
WORKDIR /home/runner/runner

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
66 changes: 66 additions & 0 deletions scripts/docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env bash
#
# Container entrypoint for a self-hosted GitHub Actions runner.
#
# First start: copies the staged runner binaries from /opt/runner-template
# into the persistent work dir, registers with the org using
# RUNNER_TOKEN, then execs `./run.sh`.
#
# Subsequent starts: skips config, reconnects with the persisted
# `.runner` / `.credentials` files. RUNNER_TOKEN is only needed for the
# first start (a registration token is valid 1h and burned on use).
#
# Required env:
# RUNNER_ORG — GitHub org (e.g. wavekat)
# RUNNER_NAME — unique name for this runner
# RUNNER_LABELS — comma-separated labels
# RUNNER_TOKEN — registration token (first start only)
#
# Optional:
# RUNNER_GROUP — runner group (default: Default)

set -euo pipefail

: "${RUNNER_ORG:?RUNNER_ORG is required}"
: "${RUNNER_NAME:?RUNNER_NAME is required}"
: "${RUNNER_LABELS:?RUNNER_LABELS is required}"

RUNNER_GROUP="${RUNNER_GROUP:-Default}"
WORK_DIR="/home/runner/runner"

# First-boot fix: Docker creates fresh named volumes owned by root:root,
# so the `runner` user can't write into the mountpoint until we chown
# it. Do that as root, then re-exec ourselves as `runner`.
if [[ "$(id -u)" -eq 0 ]]; then
chown -R runner:runner "${WORK_DIR}"
exec sudo \
--preserve-env=RUNNER_ORG,RUNNER_NAME,RUNNER_LABELS,RUNNER_TOKEN,RUNNER_GROUP \
-u runner -H /usr/local/bin/entrypoint.sh "$@"
fi

if [[ ! -f "${WORK_DIR}/config.sh" ]]; then
cp -a /opt/runner-template/. "${WORK_DIR}/"
fi

cd "${WORK_DIR}"

if [[ ! -f .runner ]]; then
if [[ -z "${RUNNER_TOKEN:-}" ]]; then
echo "first start but RUNNER_TOKEN is empty — cannot register" >&2
exit 1
fi
./config.sh \
--unattended \
--replace \
--url "https://github.com/${RUNNER_ORG}" \
--token "${RUNNER_TOKEN}" \
--name "${RUNNER_NAME}" \
--runnergroup "${RUNNER_GROUP}" \
--labels "${RUNNER_LABELS}" \
--work _work
fi

# Forward SIGTERM cleanly so the runner finishes its current job before
# exiting. Do NOT call `./config.sh remove` here — we want the
# registration to persist across container restarts.
exec ./run.sh
164 changes: 164 additions & 0 deletions scripts/setup-gha-runners-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/usr/bin/env bash
#
# Install N self-hosted GitHub Actions runners on a single Linux host
# as Docker containers, one runner per container, managed by systemd.
#
# Each container has its own rootfs, so apt locks / port conflicts / tmp
# clashes between parallel jobs are no longer possible. Registration
# state is persisted in a per-container Docker volume so host reboots
# don't require re-registration.
#
# Usage (run on the target machine, as a user with sudo):
#
# # Easiest: let the script fetch a registration token via gh CLI.
# # (`gh auth login` once with an account that has wavekat org admin)
# ./setup-gha-runners-docker.sh
#
# # Or pass a token explicitly (valid 1h, can register multiple runners):
# RUNNER_TOKEN=AAAA... ./setup-gha-runners-docker.sh
#
# # Override defaults:
# RUNNER_COUNT=6 RUNNER_PREFIX=aoc-m3l RUNNER_LABELS=aoc-m3l,gpu \
# ./setup-gha-runners-docker.sh
#
# Re-running is safe: existing containers/services are torn down and
# re-registered. The registration token is only consumed on the first
# start of a runner — subsequent restarts use the cached credentials.

set -euo pipefail

ORG="${RUNNER_ORG:-wavekat}"
COUNT="${RUNNER_COUNT:-4}"
PREFIX="${RUNNER_PREFIX:-$(hostname -s)}"
RUNNER_LABELS="${RUNNER_LABELS:-wavekat-ci,${PREFIX}}"
IMAGE="${RUNNER_IMAGE:-wavekat/gha-runner:latest}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DOCKER_CONTEXT="${SCRIPT_DIR}/docker"

log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m!!\033[0m %s\n' "$*" >&2; }
die() { printf '\033[1;31mxx\033[0m %s\n' "$*" >&2; exit 1; }

[[ "$(uname -s)" == "Linux" ]] || die "this script targets Linux (got $(uname -s))"
[[ -d "${DOCKER_CONTEXT}" ]] || die "missing docker context at ${DOCKER_CONTEXT}"

# 1. Ensure Docker is installed.
if ! command -v docker >/dev/null 2>&1; then
log "installing docker (docker.io from Ubuntu archive)"
sudo apt-get update -y
sudo apt-get install -y --no-install-recommends docker.io
sudo systemctl enable --now docker
fi

# 2. Build the runner image.
log "building runner image ${IMAGE}"
sudo docker build -t "${IMAGE}" "${DOCKER_CONTEXT}"

# 3. Fetch a registration token (one token can register multiple runners
# within its 1h validity window).
get_token() {
if [[ -n "${RUNNER_TOKEN:-}" ]]; then
printf '%s' "${RUNNER_TOKEN}"
return
fi
if ! command -v gh >/dev/null 2>&1; then
cat >&2 <<'EOF'

No RUNNER_TOKEN set, and `gh` CLI is not installed.

Install gh on Ubuntu and re-run, or fetch a token elsewhere:

sudo apt-get install -y gh
gh auth login # use an account with wavekat org admin

# or, from any machine with gh authed as a wavekat admin:
gh api -X POST /orgs/wavekat/actions/runners/registration-token --jq .token
RUNNER_TOKEN=<token> ./setup-gha-runners-docker.sh
EOF
exit 1
fi
gh api -X POST "/orgs/${ORG}/actions/runners/registration-token" --jq .token \
|| die "failed to fetch registration token (is gh authed as a wavekat admin?)"
}

TOKEN="$(get_token)"
[[ -n "${TOKEN}" ]] || die "got empty registration token"

# 4. Install a systemd template unit. One container per instance, each
# with its own named volume so the registration persists across
# restarts and host reboots.
UNIT_PATH="/etc/systemd/system/gha-runner@.service"
log "writing ${UNIT_PATH}"
sudo tee "${UNIT_PATH}" >/dev/null <<EOF
[Unit]
Description=GitHub Actions runner (container %i)
After=docker.service network-online.target
Requires=docker.service
Wants=network-online.target

[Service]
Type=simple
EnvironmentFile=/etc/gha-runner/%i.env
ExecStartPre=-/usr/bin/docker rm -f gha-runner-%i
ExecStart=/usr/bin/docker run --rm \\
--name gha-runner-%i \\
--hostname gha-runner-%i \\
-v gha-runner-%i:/home/runner/runner \\
-e RUNNER_ORG=\${RUNNER_ORG} \\
-e RUNNER_NAME=\${RUNNER_NAME} \\
-e RUNNER_LABELS=\${RUNNER_LABELS} \\
-e RUNNER_TOKEN=\${RUNNER_TOKEN} \\
${IMAGE}
ExecStop=/usr/bin/docker stop --time=120 gha-runner-%i
Restart=always
RestartSec=10
TimeoutStopSec=180

[Install]
WantedBy=multi-user.target
EOF

sudo mkdir -p /etc/gha-runner
sudo chmod 0750 /etc/gha-runner

# 5. (Re)create N runners.
for i in $(seq 1 "${COUNT}"); do
NAME="${PREFIX}-${i}"
ENV_FILE="/etc/gha-runner/${i}.env"
log "configuring runner ${NAME}"

# If a previous runner exists, stop it and wipe its volume so the
# fresh registration token can be applied cleanly.
if systemctl list-unit-files | grep -q "gha-runner@${i}.service"; then
warn "existing service for ${i} found — stopping"
sudo systemctl stop "gha-runner@${i}.service" || true
fi
sudo docker rm -f "gha-runner-${i}" >/dev/null 2>&1 || true
sudo docker volume rm "gha-runner-${i}" >/dev/null 2>&1 || true

sudo tee "${ENV_FILE}" >/dev/null <<EOF
RUNNER_ORG=${ORG}
RUNNER_NAME=${NAME}
RUNNER_LABELS=${RUNNER_LABELS}
RUNNER_TOKEN=${TOKEN}
EOF
sudo chmod 0600 "${ENV_FILE}"
done

# 6. Reload systemd and start everything.
sudo systemctl daemon-reload
for i in $(seq 1 "${COUNT}"); do
log "starting gha-runner@${i}"
sudo systemctl enable --now "gha-runner@${i}.service"
done

# NOTE: we intentionally do NOT scrub RUNNER_TOKEN from the env files
# after start. The token is single-use and expires after 1h, so leaving
# it in a root-owned 0600 file is harmless. Scrubbing it would make
# systemd's auto-restart unable to recover if config.sh failed on the
# first attempt (no token in the env file → no way to register).

log "done — ${COUNT} runner(s) registered to ${ORG}"
log "check status: systemctl list-units 'gha-runner@*.service'"
log "live logs: journalctl -u 'gha-runner@1.service' -f"
log "container shells: docker exec -it gha-runner-1 bash"
79 changes: 79 additions & 0 deletions scripts/uninstall-gha-runners-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env bash
#
# Tear down Docker-based self-hosted GitHub Actions runners installed
# by setup-gha-runners-docker.sh. Stops the systemd services, removes
# the containers and volumes, and de-registers each runner from the
# `wavekat` org.
#
# Usage:
# ./uninstall-gha-runners-docker.sh
# RUNNER_TOKEN=AAAA... ./uninstall-gha-runners-docker.sh # remove-token
#
# A *remove* token can be fetched via:
# gh api -X POST /orgs/wavekat/actions/runners/remove-token --jq .token

set -euo pipefail

ORG="${RUNNER_ORG:-wavekat}"
COUNT="${RUNNER_COUNT:-4}"
PREFIX="${RUNNER_PREFIX:-$(hostname -s)}"

log() { printf '\033[1;36m==>\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m!!\033[0m %s\n' "$*" >&2; }
die() { printf '\033[1;31mxx\033[0m %s\n' "$*" >&2; exit 1; }

get_token() {
if [[ -n "${RUNNER_TOKEN:-}" ]]; then
printf '%s' "${RUNNER_TOKEN}"
return
fi
if ! command -v gh >/dev/null 2>&1; then
cat >&2 <<'EOF'

No RUNNER_TOKEN set, and `gh` CLI is not installed.

Install gh on Ubuntu and re-run, or fetch a remove-token elsewhere:

sudo apt-get install -y gh
gh auth login

# or, from any machine with gh authed as a wavekat admin:
gh api -X POST /orgs/wavekat/actions/runners/remove-token --jq .token
RUNNER_TOKEN=<token> ./uninstall-gha-runners-docker.sh
EOF
exit 1
fi
gh api -X POST "/orgs/${ORG}/actions/runners/remove-token" --jq .token
}

TOKEN="$(get_token)"
[[ -n "${TOKEN}" ]] || die "got empty remove token"

for i in $(seq 1 "${COUNT}"); do
NAME="${PREFIX}-${i}"
log "removing runner ${NAME}"

# De-register inside the container before tearing it down (best effort —
# if the volume is already gone the runner is effectively orphaned and
# will need to be removed from the GitHub UI manually).
if sudo docker ps -a --format '{{.Names}}' | grep -q "^gha-runner-${i}$"; then
sudo docker exec "gha-runner-${i}" \
bash -c "cd /home/runner/runner && ./config.sh remove --token '${TOKEN}'" \
|| warn "in-container de-register failed for ${NAME} (will be removed locally regardless)"
fi

sudo systemctl disable --now "gha-runner@${i}.service" 2>/dev/null || true
sudo docker rm -f "gha-runner-${i}" >/dev/null 2>&1 || true
sudo docker volume rm "gha-runner-${i}" >/dev/null 2>&1 || true
sudo rm -f "/etc/gha-runner/${i}.env"
done

# Remove the template unit only if no instances remain enabled.
if ! systemctl list-units --all 'gha-runner@*.service' --no-legend | grep -q .; then
log "removing systemd template unit"
sudo rm -f /etc/systemd/system/gha-runner@.service
sudo rmdir /etc/gha-runner 2>/dev/null || true
sudo systemctl daemon-reload
fi

log "done"
Loading