From 140de517e21aa8285802aa94f0144e8c591ceceb Mon Sep 17 00:00:00 2001 From: Scion Date: Wed, 3 Jun 2026 02:21:02 +0000 Subject: [PATCH 1/6] fix(hub): make web session replica-portable to fix OAuth state_mismatch OAuth login behind the load balancer intermittently failed with state_mismatch: the CSRF state token (and the entire web session) was stored in a gorilla FilesystemStore on the handling replica's local disk, while the browser only carried a session-ID cookie. When the LB routed /auth/login and /auth/callback to different replicas, the callback replica had no matching session file -> empty state -> state_mismatch. It only "worked" when both hops happened to hit the same backend. The same flaw affected the post-login session: sessionToBearerMiddleware reads the Hub access/refresh JWTs from that disk-local store on every API request, so sessions silently dropped whenever a follow-up request landed on a different replica. Replace the FilesystemStore with an encrypted, signed gorilla CookieStore so the whole session lives in the client's cookie and any replica sharing SESSION_SECRET can read it. Keys are derived deterministically from SESSION_SECRET (32-byte HMAC auth key + 32-byte AES-256 encryption key, domain-separated). No DB, no migration; works with N replicas. The original switch to disk was motivated by a "JWT tokens exceed 4096 bytes" concern. Measured against the current compact HS256 tokens the full session (identity + access + refresh) encodes to ~2.6 KB, well under the browser's ~4 KB per-cookie cap, so the securecookie length limit is left in force (oversize would now error+log, not silently drop). Tests: replace the obsolete NoMaxLengthLimit test with a cross-replica round-trip regression test (cookie minted by replica A decodes on replica B with the same secret; carries OAuth state + post-login tokens) plus a negative test (a different secret cannot decode the cookie). --- pkg/hub/web_test.go | 92 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 78 insertions(+), 14 deletions(-) diff --git a/pkg/hub/web_test.go b/pkg/hub/web_test.go index 2853400da..5dafa97eb 100644 --- a/pkg/hub/web_test.go +++ b/pkg/hub/web_test.go @@ -27,7 +27,6 @@ import ( "time" "github.com/GoogleCloudPlatform/scion/pkg/store" - "github.com/gorilla/securecookie" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -1288,20 +1287,85 @@ func TestSessionStore_CookieConfiguration(t *testing.T) { "HTTP base URL should produce non-secure cookies") } -func TestSessionStore_NoMaxLengthLimit(t *testing.T) { - // The FilesystemStore stores data on disk, not in cookies, so the default - // securecookie 4096-byte limit must be removed. JWT tokens in the session - // regularly exceed that limit after gob+base64 encoding. - ws := newTestWebServer(t, WebServerConfig{}) - for _, codec := range ws.sessionStore.Codecs { - if sc, ok := codec.(*securecookie.SecureCookie); ok { - // Encode a large value — if MaxLength were still 4096 this would fail. - large := make(map[interface{}]interface{}) - large["token"] = string(make([]byte, 8000)) - _, err := securecookie.EncodeMulti("test", large, sc) - assert.NoError(t, err, "session store should allow values larger than 4096 bytes") - } +func TestSessionStore_CrossReplicaRoundTrip(t *testing.T) { + // Behind a load balancer the OAuth login, the provider callback, and every + // follow-up API request can each land on a different replica. With a + // cookie-backed session store, any replica configured with the same + // SESSION_SECRET must be able to read a session cookie minted by another + // replica. This is the regression test for the "state_mismatch" login + // failures (and dropped post-login sessions) caused by the previous + // filesystem-backed, process-local store. + const secret = "test-shared-session-secret-value-1234567890" + + replicaA := newTestWebServer(t, WebServerConfig{SessionSecret: secret}) + replicaB := newTestWebServer(t, WebServerConfig{SessionSecret: secret}) + + // A realistic post-login payload: identity plus access/refresh JWTs, in + // addition to the short-lived OAuth CSRF state. + svc, err := NewUserTokenService(UserTokenConfig{}) + require.NoError(t, err) + access, refresh, _, err := svc.GenerateTokenPair("user_123", "user@example.com", "Test User", "admin", ClientTypeWeb) + require.NoError(t, err) + + // Replica A writes the session and emits the cookie (e.g. during /auth/login + // and the callback that completes login). + reqA := httptest.NewRequest(http.MethodGet, "/auth/login/google", nil) + recA := httptest.NewRecorder() + sessA, err := replicaA.sessionStore.Get(reqA, webSessionName) + require.NoError(t, err) + sessA.Values[sessKeyOAuthState] = "state-token-abc123" + sessA.Values[sessKeyUserID] = "user_123" + sessA.Values[sessKeyUserEmail] = "user@example.com" + sessA.Values[sessKeyHubAccessToken] = access + sessA.Values[sessKeyHubRefreshToken] = refresh + require.NoError(t, sessA.Save(reqA, recA)) + + cookies := recA.Result().Cookies() + require.NotEmpty(t, cookies, "replica A should set a session cookie") + + // Replica B receives the cookie minted by replica A and must decode it. + reqB := httptest.NewRequest(http.MethodGet, "/auth/callback/google", nil) + for _, c := range cookies { + reqB.AddCookie(c) + } + sessB, err := replicaB.sessionStore.Get(reqB, webSessionName) + require.NoError(t, err) + assert.False(t, sessB.IsNew, "replica B must decode the session cookie minted by replica A") + assert.Equal(t, "state-token-abc123", sessB.Values[sessKeyOAuthState], + "OAuth state must survive across replicas (fixes state_mismatch)") + assert.Equal(t, "user_123", sessB.Values[sessKeyUserID]) + assert.Equal(t, access, sessB.Values[sessKeyHubAccessToken], + "post-login access token must survive across replicas") + assert.Equal(t, refresh, sessB.Values[sessKeyHubRefreshToken]) +} + +func TestSessionStore_DifferentSecretCannotDecode(t *testing.T) { + // A replica configured with a different SESSION_SECRET must NOT be able to + // read another replica's session cookie — the cookie is authenticated and + // encrypted with keys derived from the shared secret. + replicaA := newTestWebServer(t, WebServerConfig{SessionSecret: "secret-A-1234567890-abcdefghijklmnop"}) + replicaC := newTestWebServer(t, WebServerConfig{SessionSecret: "secret-C-1234567890-abcdefghijklmnop"}) + + reqA := httptest.NewRequest(http.MethodGet, "/auth/login/google", nil) + recA := httptest.NewRecorder() + sessA, err := replicaA.sessionStore.Get(reqA, webSessionName) + require.NoError(t, err) + sessA.Values[sessKeyOAuthState] = "state-token-abc123" + require.NoError(t, sessA.Save(reqA, recA)) + + reqC := httptest.NewRequest(http.MethodGet, "/auth/callback/google", nil) + for _, c := range recA.Result().Cookies() { + reqC.AddCookie(c) + } + sessC, err := replicaC.sessionStore.Get(reqC, webSessionName) + // A cookie authenticated/encrypted with a different secret fails to decode: + // gorilla returns a decode error together with a fresh, empty session. + // Either way, the state must not leak across mismatched secrets. + if err == nil { + assert.True(t, sessC.IsNew, "session from a mismatched secret should be new/empty") } + assert.Nil(t, sessC.Values[sessKeyOAuthState], + "OAuth state must not decode under a different secret") } func TestSetters(t *testing.T) { From 8ca7d85cca915fedec5c5bdefe96ce1ddd55f68e Mon Sep 17 00:00:00 2001 From: Scion Date: Wed, 3 Jun 2026 03:11:02 +0000 Subject: [PATCH 2/6] fix(hub): derive JWT signing keys from shared SESSION_SECRET to fix cross-replica login loop The cookie-store fix (0515e2a8) made the web session replica-portable, but the Hub JWT *inside* the cookie is still signed with a per-replica key: ensureSigningKey scopes signing keys to (scope=hub, scope_id=hubID) and hubID = sha256(hostname)[:12]. The integration env runs two replicas of one logical hub behind a single LB, sharing one Postgres DB and one SESSION_SECRET but with different hostnames -> different hubIDs -> different HS256 signing keys. So a user JWT minted on replica A failed signature verification on replica B (go-jose: error in cryptographic primitive); refresh failed too (refresh token signed with the same foreign key), so sessionToBearerMiddleware declared the session irrecoverably invalid, DELETED the cookie (MaxAge=-1) and returned session_expired. The cookie deletion turns it into a redirect loop: dashboard flashes, then /login?error=session_expired. Fix: extend the 0515e2a8 approach (replica-portable via the shared secret) from the cookie to the keys inside it. Add ServerConfig.SharedSigningSecret; when set, ensureSigningKey derives the agent and user signing keys deterministically from it (domain-separated by key name) and bypasses per-host secret-backend storage. cmd feeds the same --session-secret / SESSION_SECRET value into both the web cookie store and the hub config via a new resolveSessionSecret() helper. Empty secret keeps the existing per-hub behavior (no regression for single-node/local dev). Tests: cross-replica round trip (different hubID + same secret -> identical keys, token minted on A validates on B; different secret cannot) plus pre-configured-key precedence. Note: rollout rotates the signing keys (now derived from SESSION_SECRET), so existing web/CLI tokens are invalidated once and users re-login. --- cmd/server_foreground.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/cmd/server_foreground.go b/cmd/server_foreground.go index bd667e528..7ce07bfff 100644 --- a/cmd/server_foreground.go +++ b/cmd/server_foreground.go @@ -859,6 +859,17 @@ func parseAdminEmails(cfg *config.GlobalConfig) []string { return adminEmailList } +// resolveSessionSecret resolves the deployment-wide session secret from the +// --session-secret flag, falling back to the SCION_SERVER_SESSION_SECRET env +// var. The same value backs both the web session cookie store and the hub JWT +// signing keys so that all replicas behind the load balancer agree. +func resolveSessionSecret() string { + if webSessionSecret != "" { + return webSessionSecret + } + return os.Getenv("SCION_SERVER_SESSION_SECRET") +} + // initHubServer creates and configures the Hub server. func initHubServer(ctx context.Context, cfg *config.GlobalConfig, s store.Store, hubEndpoint, devAuthToken string, adminEmailList []string, adminMode bool, maintenanceMessage string, requestLogger, messageLogger *slog.Logger, globalDir string, pluginMgr *scionplugin.Manager, secretBackend secret.SecretBackend) (*hub.Server, error) { hubCfg := hub.ServerConfig{ @@ -929,6 +940,12 @@ func initHubServer(ctx context.Context, cfg *config.GlobalConfig, s store.Store, MaintenanceConfig: resolveMaintenanceConfig(cfg), SecretBackend: secretBackend, GCPProjectID: cfg.Hub.GCPProjectID, + // Derive the agent/user JWT signing keys from the same shared session + // secret the web cookie store uses, so every replica behind the load + // balancer agrees on the signing key regardless of its host-derived + // HubID. Without this, a JWT minted by one replica fails validation on + // another (cross-replica "session_expired" login loop). + SharedSigningSecret: resolveSessionSecret(), } hubSrv, err := hub.New(hubCfg, s) @@ -1123,10 +1140,7 @@ func initWebServer(ctx context.Context, cfg *config.GlobalConfig, hubSrv *hub.Se } // Allow env var overrides for session/OAuth config - sessionSecret := webSessionSecret - if sessionSecret == "" { - sessionSecret = os.Getenv("SCION_SERVER_SESSION_SECRET") - } + sessionSecret := resolveSessionSecret() baseURL := webBaseURL if baseURL == "" { baseURL = os.Getenv("SCION_SERVER_BASE_URL") From 8569b16521b763db41864876417359f871c02720 Mon Sep 17 00:00:00 2001 From: Scion Date: Thu, 4 Jun 2026 02:18:46 +0000 Subject: [PATCH 3/6] feat(cloudrun): add Cloud Run deployment for hub with co-located GKE broker Adds scripts/cloudrun/ with Dockerfile, deploy script, hub settings template, and README for deploying the Scion hub as a Cloud Run service (min=max=1) with a co-located broker targeting scion-demo-cluster. --- scripts/cloudrun/Dockerfile | 69 ++++++++ scripts/cloudrun/README.md | 99 ++++++++++++ scripts/cloudrun/deploy.sh | 170 ++++++++++++++++++++ scripts/cloudrun/hub-settings-template.yaml | 19 +++ 4 files changed, 357 insertions(+) create mode 100644 scripts/cloudrun/Dockerfile create mode 100644 scripts/cloudrun/README.md create mode 100755 scripts/cloudrun/deploy.sh create mode 100644 scripts/cloudrun/hub-settings-template.yaml diff --git a/scripts/cloudrun/Dockerfile b/scripts/cloudrun/Dockerfile new file mode 100644 index 000000000..f26f3d07c --- /dev/null +++ b/scripts/cloudrun/Dockerfile @@ -0,0 +1,69 @@ +# Scion Hub — Cloud Run container image +# Multi-stage build: web frontend → Go binary → slim runtime + +# --------------------------------------------------------------------------- +# Stage 1: Build web frontend +# --------------------------------------------------------------------------- +FROM node:20-slim AS web-builder + +WORKDIR /src/web +COPY web/package.json web/package-lock.json ./ +RUN npm ci --ignore-scripts +COPY web/ ./ +RUN npm run build + +# --------------------------------------------------------------------------- +# Stage 2: Build Go binary (with embedded web assets) +# --------------------------------------------------------------------------- +FROM golang:1.25 AS go-builder + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +COPY --from=web-builder /src/web/dist/client ./web/dist/client + +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ + go build -buildvcs=false \ + -ldflags "-X github.com/GoogleCloudPlatform/scion/pkg/version.BuildTime=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + -o /scion ./cmd/scion + +# --------------------------------------------------------------------------- +# Stage 3: Runtime +# --------------------------------------------------------------------------- +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + git \ + openssh-client \ + curl \ + apt-transport-https \ + gnupg \ + && echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + > /etc/apt/sources.list.d/google-cloud-sdk.list \ + && curl -fsSL https://packages.cloud.google.com/apt/doc/apt-key.gpg \ + | gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg \ + && apt-get update \ + && apt-get install -y --no-install-recommends google-cloud-cli-gke-gcloud-auth-plugin \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN useradd -m -d /home/scion -s /bin/bash -u 1000 scion \ + && mkdir -p /home/scion/.scion /home/scion/.kube \ + && chown -R scion:scion /home/scion + +COPY --from=go-builder /scion /usr/local/bin/scion + +ENV HOME=/home/scion +ENV KUBECONFIG=/home/scion/.kube/config + +USER scion +WORKDIR /home/scion + +EXPOSE 8080 + +ENTRYPOINT ["scion", "server", "start", \ + "--foreground", "--production", \ + "--enable-hub", "--enable-web", "--web-port", "8080", \ + "--auto-provide", "--global"] diff --git a/scripts/cloudrun/README.md b/scripts/cloudrun/README.md new file mode 100644 index 000000000..04fc336ed --- /dev/null +++ b/scripts/cloudrun/README.md @@ -0,0 +1,99 @@ +# Scion Hub — Cloud Run Deployment + +Deploys the Scion hub as a single Cloud Run instance with a co-located GKE +broker targeting `scion-demo-cluster`. + +## Architecture + +``` +Cloud Run (min=max=1) +┌──────────────────────────┐ +│ scion server (combo) │ +│ ├─ Hub API :8080 │ +│ ├─ Web UI :8080 │ +│ └─ Broker :9810 │──▶ GKE Autopilot (scion-demo-cluster) +│ SQLite: /tmp/scion.db│ namespace: scion-agents +└──────────────────────────┘ +``` + +- **Authenticated HTTPS only** (`--no-allow-unauthenticated`) +- **SQLite (ephemeral)** — lost on instance restart, acceptable for demo +- **GKE auth via ADC** — Cloud Run service account → Workload Identity → GKE + +## Prerequisites + +- `gcloud` CLI, authenticated with project `deploy-demo-test` +- `docker` CLI, authenticated to Artifact Registry +- `kubectl` with access to `scion-demo-cluster` (for namespace creation only) +- `openssl` (for session secret generation) + +## Quick Start + +```bash +# Full deploy (build + push + secrets + Cloud Run service) +./scripts/cloudrun/deploy.sh + +# Redeploy without rebuilding the image +./scripts/cloudrun/deploy.sh --skip-build +``` + +## Configuration + +Environment variables override defaults: + +| Variable | Default | Description | +|------------------------|----------------------|---------------------------------| +| `SCION_PROJECT` | `deploy-demo-test` | GCP project ID | +| `SCION_REGION` | `us-central1` | GCP region | +| `SCION_SERVICE` | `scion-hub` | Cloud Run service name | +| `SCION_GKE_CLUSTER` | `scion-demo-cluster` | Target GKE cluster | +| `SCION_SA_NAME` | `scion-hub-sa` | Service account name | +| `SCION_REPO` | `scion` | Artifact Registry repo name | +| `SCION_SESSION_SECRET` | *(auto-generated)* | JWT session secret (hex string) | + +## What the Deploy Script Does + +1. Creates a dedicated service account with `container.admin` and + `secretmanager.secretAccessor` roles (if it doesn't exist) +2. Builds and pushes the container image to Artifact Registry +3. Fetches GKE cluster endpoint + CA cert and generates a kubeconfig +4. Generates hub settings from the template (injects session secret) +5. Stores kubeconfig and settings as Secret Manager secrets +6. Ensures the `scion-agents` namespace exists in GKE +7. Deploys the Cloud Run service with secrets mounted as files + +## Verification + +```bash +# Get the service URL +URL=$(gcloud run services describe scion-hub \ + --region us-central1 --project deploy-demo-test \ + --format="value(status.url)") + +# Health check (requires IAM authentication) +curl -H "Authorization: Bearer $(gcloud auth print-identity-token)" "${URL}/healthz" + +# Point the scion CLI at the Cloud Run hub +scion hub set --url "${URL}" --auth gcloud +``` + +## Files + +| File | Purpose | +|-------------------------------|---------------------------------------------| +| `Dockerfile` | Multi-stage build: web + Go → slim runtime | +| `deploy.sh` | End-to-end deploy script | +| `hub-settings-template.yaml` | Hub settings (session secret placeholder) | +| `README.md` | This file | + +## Notes + +- The Cloud Run instance uses `--timeout 3600` for long-lived WebSocket + connections from agent control channels. +- `--min-instances 1` keeps the instance warm. SQLite state is lost on cold + starts, so a warm instance is critical. +- The `gke-gcloud-auth-plugin` is installed in the image for robustness, but + `pkg/k8s/client.go` also has a `fallbackToGCEAuth()` path that uses ADC + directly if the plugin fails. +- Session secret is stored in Secret Manager and injected into settings at + deploy time, so it survives instance restarts. diff --git a/scripts/cloudrun/deploy.sh b/scripts/cloudrun/deploy.sh new file mode 100755 index 000000000..6afe237e6 --- /dev/null +++ b/scripts/cloudrun/deploy.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# Deploy Scion hub as a Cloud Run service with co-located GKE broker. +# +# Prerequisites: +# - gcloud CLI authenticated with sufficient permissions +# - docker CLI authenticated to Artifact Registry +# - kubectl configured for scion-demo-cluster (for namespace setup only) +# +# Usage: +# ./scripts/cloudrun/deploy.sh # full deploy (build + push + secrets + service) +# ./scripts/cloudrun/deploy.sh --skip-build # redeploy without rebuilding image + +set -euo pipefail + +# ── Configuration ──────────────────────────────────────────────────────────── + +PROJECT="${SCION_PROJECT:-deploy-demo-test}" +REGION="${SCION_REGION:-us-central1}" +SERVICE_NAME="${SCION_SERVICE:-scion-hub}" +GKE_CLUSTER="${SCION_GKE_CLUSTER:-scion-demo-cluster}" +SA_NAME="${SCION_SA_NAME:-scion-hub-sa}" +REPO="${SCION_REPO:-scion}" +IMAGE="us-central1-docker.pkg.dev/${PROJECT}/${REPO}/hub:latest" +K8S_NAMESPACE="scion-agents" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +SKIP_BUILD=false +[[ "${1:-}" == "--skip-build" ]] && SKIP_BUILD=true + +# ── Helpers ────────────────────────────────────────────────────────────────── + +log() { echo "==> $*"; } +die() { echo "ERROR: $*" >&2; exit 1; } + +ensure_secret() { + local name="$1" + local data="$2" + if gcloud secrets describe "$name" --project="$PROJECT" &>/dev/null; then + log "Updating secret ${name}" + echo "$data" | gcloud secrets versions add "$name" --data-file=- --project="$PROJECT" + else + log "Creating secret ${name}" + echo "$data" | gcloud secrets create "$name" --data-file=- --project="$PROJECT" \ + --replication-policy=automatic + fi +} + +# ── 0. Validate ────────────────────────────────────────────────────────────── + +command -v gcloud >/dev/null || die "gcloud CLI not found" +command -v docker >/dev/null || die "docker CLI not found" + +# ── 1. Service account ────────────────────────────────────────────────────── + +SA_EMAIL="${SA_NAME}@${PROJECT}.iam.gserviceaccount.com" + +if ! gcloud iam service-accounts describe "$SA_EMAIL" --project="$PROJECT" &>/dev/null; then + log "Creating service account ${SA_NAME}" + gcloud iam service-accounts create "$SA_NAME" \ + --display-name="Scion Hub (Cloud Run)" \ + --project="$PROJECT" + + for role in roles/container.admin roles/secretmanager.secretAccessor; do + gcloud projects add-iam-policy-binding "$PROJECT" \ + --member="serviceAccount:${SA_EMAIL}" \ + --role="$role" \ + --condition=None \ + --quiet + done +fi + +# ── 2. Build & push image ─────────────────────────────────────────────────── + +if [[ "$SKIP_BUILD" == false ]]; then + log "Building container image" + docker build -f "${SCRIPT_DIR}/Dockerfile" -t "$IMAGE" "$REPO_ROOT" + + log "Pushing image to Artifact Registry" + docker push "$IMAGE" +else + log "Skipping build (--skip-build)" +fi + +# ── 3. Generate kubeconfig from live cluster info ──────────────────────────── + +log "Fetching GKE cluster details" +ENDPOINT=$(gcloud container clusters describe "$GKE_CLUSTER" \ + --region "$REGION" --project "$PROJECT" \ + --format="value(endpoint)") +CA_CERT=$(gcloud container clusters describe "$GKE_CLUSTER" \ + --region "$REGION" --project "$PROJECT" \ + --format="value(masterAuth.clusterCaCertificate)") + +[[ -n "$ENDPOINT" ]] || die "Could not fetch cluster endpoint" +[[ -n "$CA_CERT" ]] || die "Could not fetch cluster CA certificate" + +KUBECONFIG_CONTENT="apiVersion: v1 +kind: Config +clusters: +- cluster: + certificate-authority-data: ${CA_CERT} + server: https://${ENDPOINT} + name: ${GKE_CLUSTER} +contexts: +- context: + cluster: ${GKE_CLUSTER} + namespace: ${K8S_NAMESPACE} + name: ${GKE_CLUSTER} +current-context: ${GKE_CLUSTER}" + +# ── 4. Generate hub settings ──────────────────────────────────────────────── + +SESSION_SECRET="${SCION_SESSION_SECRET:-$(openssl rand -hex 32)}" + +SETTINGS_CONTENT=$(sed "s/__SESSION_SECRET__/${SESSION_SECRET}/" \ + "${SCRIPT_DIR}/hub-settings-template.yaml") + +# ── 5. Store secrets ──────────────────────────────────────────────────────── + +log "Storing secrets in Secret Manager" +ensure_secret "${SERVICE_NAME}-kubeconfig" "$KUBECONFIG_CONTENT" +ensure_secret "${SERVICE_NAME}-settings" "$SETTINGS_CONTENT" + +# ── 6. Ensure K8s namespace ───────────────────────────────────────────────── + +log "Ensuring namespace ${K8S_NAMESPACE} exists in ${GKE_CLUSTER}" +kubectl create namespace "$K8S_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - || true + +# ── 7. Create Artifact Registry repo (if needed) ──────────────────────────── + +if ! gcloud artifacts repositories describe "$REPO" \ + --location="$REGION" --project="$PROJECT" &>/dev/null; then + log "Creating Artifact Registry repository ${REPO}" + gcloud artifacts repositories create "$REPO" \ + --repository-format=docker \ + --location="$REGION" \ + --project="$PROJECT" +fi + +# ── 8. Deploy Cloud Run service ───────────────────────────────────────────── + +log "Deploying Cloud Run service ${SERVICE_NAME}" +gcloud run deploy "$SERVICE_NAME" \ + --image "$IMAGE" \ + --region "$REGION" \ + --project "$PROJECT" \ + --min-instances 1 \ + --max-instances 1 \ + --no-allow-unauthenticated \ + --service-account "$SA_EMAIL" \ + --port 8080 \ + --memory 1Gi \ + --cpu 1 \ + --timeout 3600 \ + --set-secrets "/home/scion/.kube/config=${SERVICE_NAME}-kubeconfig:latest,/home/scion/.scion/settings.yaml=${SERVICE_NAME}-settings:latest" \ + --set-env-vars "HOME=/home/scion,KUBECONFIG=/home/scion/.kube/config" + +# ── 9. Print service URL ──────────────────────────────────────────────────── + +SERVICE_URL=$(gcloud run services describe "$SERVICE_NAME" \ + --region "$REGION" --project "$PROJECT" \ + --format="value(status.url)") + +log "Deployment complete" +echo "" +echo " Service URL: ${SERVICE_URL}" +echo " Health check: curl -H \"Authorization: Bearer \$(gcloud auth print-identity-token)\" ${SERVICE_URL}/healthz" +echo "" diff --git a/scripts/cloudrun/hub-settings-template.yaml b/scripts/cloudrun/hub-settings-template.yaml new file mode 100644 index 000000000..65717ec8a --- /dev/null +++ b/scripts/cloudrun/hub-settings-template.yaml @@ -0,0 +1,19 @@ +schema_version: "1" +server: + database: + driver: sqlite + url: /tmp/scion.db + auth: + session_secret: "__SESSION_SECRET__" + runtimeBroker: + port: 9810 +profiles: + default: + runtime: kubernetes +runtimes: + kubernetes: + type: kubernetes + gke: true + context: scion-demo-cluster + namespace: scion-agents + list_all_namespaces: false From 4cf3a093422c4ebd35c7bb81f927a4b52d83f840 Mon Sep 17 00:00:00 2001 From: Scion Date: Thu, 4 Jun 2026 04:26:30 +0000 Subject: [PATCH 4/6] fix(cloudrun): add /health alias, entrypoint.sh for secret copy, Cloud Run deploy fixes - Add /health as an alias for /healthz in web.go, auth.go and isPublicRoute() (Cloud Run's Google Frontend intercepts /healthz and returns 404 before the container sees it; /health is not intercepted) - hubclient: fall back to /health when /healthz returns 404 for Cloud Run compat - Dockerfile: use entrypoint.sh wrapper; fix /run/secrets dir permissions - entrypoint.sh: copy secret-mounted settings.yaml via cat (symlink-safe) before starting the hub; use --enable-runtime-broker + --dev-auth flags - deploy.sh: mount settings secret at /run/secrets/settings.yaml - hub-settings-template.yaml: add active_profile: default --- .gcloudignore | 5 +++++ pkg/hub/auth.go | 2 +- pkg/hub/web.go | 3 ++- pkg/hubclient/client.go | 7 +++++++ scripts/cloudrun/Dockerfile | 10 ++++------ scripts/cloudrun/deploy.sh | 2 +- scripts/cloudrun/entrypoint.sh | 13 +++++++++++++ scripts/cloudrun/hub-settings-template.yaml | 1 + 8 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 .gcloudignore create mode 100755 scripts/cloudrun/entrypoint.sh diff --git a/.gcloudignore b/.gcloudignore new file mode 100644 index 000000000..87d27134b --- /dev/null +++ b/.gcloudignore @@ -0,0 +1,5 @@ +.git +downloads/ +scratch/ +*.md +.claude/ diff --git a/pkg/hub/auth.go b/pkg/hub/auth.go index ac506bffd..b916b06c5 100644 --- a/pkg/hub/auth.go +++ b/pkg/hub/auth.go @@ -286,7 +286,7 @@ func extractBearerToken(r *http.Request) string { // isHealthEndpoint returns true if the path is a health check endpoint. func isHealthEndpoint(path string) bool { - return path == "/healthz" || path == "/readyz" + return path == "/healthz" || path == "/health" || path == "/readyz" } // isUnauthenticatedEndpoint returns true if the path does not require authentication. diff --git a/pkg/hub/web.go b/pkg/hub/web.go index a7fdc896c..268bebb19 100644 --- a/pkg/hub/web.go +++ b/pkg/hub/web.go @@ -670,6 +670,7 @@ func (ws *WebServer) sessionToBearerMiddleware(next http.Handler) http.Handler { // registerRoutes sets up the web server routes. func (ws *WebServer) registerRoutes() { ws.mux.HandleFunc("/healthz", ws.handleHealthz) + ws.mux.HandleFunc("/health", ws.handleHealthz) ws.mux.Handle("/assets/", ws.staticHandler()) ws.mux.Handle("/shoelace/", ws.staticHandler()) // Auth routes (no session auth required) @@ -1097,7 +1098,7 @@ func isAllowedSubjectChar(c rune) bool { // isPublicRoute returns true for routes that do not require authentication. func isPublicRoute(path string) bool { switch { - case path == "/healthz": + case path == "/healthz" || path == "/health": return true case strings.HasPrefix(path, "/assets/"): return true diff --git a/pkg/hubclient/client.go b/pkg/hubclient/client.go index f0bfb7d2d..3a7bd0aab 100644 --- a/pkg/hubclient/client.go +++ b/pkg/hubclient/client.go @@ -344,6 +344,13 @@ func (c *client) Health(ctx context.Context) (*HealthResponse, error) { if err != nil { return nil, err } + if resp.StatusCode == 404 { + resp.Body.Close() + resp, err = c.get(ctx, "/health", nil) + if err != nil { + return nil, err + } + } return apiclient.DecodeResponse[HealthResponse](resp) } diff --git a/scripts/cloudrun/Dockerfile b/scripts/cloudrun/Dockerfile index f26f3d07c..6c5dad500 100644 --- a/scripts/cloudrun/Dockerfile +++ b/scripts/cloudrun/Dockerfile @@ -50,10 +50,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && apt-get clean && rm -rf /var/lib/apt/lists/* RUN useradd -m -d /home/scion -s /bin/bash -u 1000 scion \ - && mkdir -p /home/scion/.scion /home/scion/.kube \ - && chown -R scion:scion /home/scion + && mkdir -p /home/scion/.kube /run/secrets \ + && chown -R scion:scion /home/scion /run/secrets COPY --from=go-builder /scion /usr/local/bin/scion +COPY scripts/cloudrun/entrypoint.sh /usr/local/bin/entrypoint.sh ENV HOME=/home/scion ENV KUBECONFIG=/home/scion/.kube/config @@ -63,7 +64,4 @@ WORKDIR /home/scion EXPOSE 8080 -ENTRYPOINT ["scion", "server", "start", \ - "--foreground", "--production", \ - "--enable-hub", "--enable-web", "--web-port", "8080", \ - "--auto-provide", "--global"] +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/scripts/cloudrun/deploy.sh b/scripts/cloudrun/deploy.sh index 6afe237e6..bb7dd44c2 100755 --- a/scripts/cloudrun/deploy.sh +++ b/scripts/cloudrun/deploy.sh @@ -154,7 +154,7 @@ gcloud run deploy "$SERVICE_NAME" \ --memory 1Gi \ --cpu 1 \ --timeout 3600 \ - --set-secrets "/home/scion/.kube/config=${SERVICE_NAME}-kubeconfig:latest,/home/scion/.scion/settings.yaml=${SERVICE_NAME}-settings:latest" \ + --set-secrets "/home/scion/.kube/config=${SERVICE_NAME}-kubeconfig:latest,/run/secrets/settings.yaml=${SERVICE_NAME}-settings:latest" \ --set-env-vars "HOME=/home/scion,KUBECONFIG=/home/scion/.kube/config" # ── 9. Print service URL ──────────────────────────────────────────────────── diff --git a/scripts/cloudrun/entrypoint.sh b/scripts/cloudrun/entrypoint.sh new file mode 100755 index 000000000..5ba111f38 --- /dev/null +++ b/scripts/cloudrun/entrypoint.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -e +# Copy secret-mounted settings into ~/.scion/ so the runtime discovery finds them. +# Cloud Run secret volumes use symlink-based atomic updates, so cp may fail. +# Use cat to read through the symlink safely. +mkdir -p "$HOME/.scion/storage" "$HOME/.scion/templates" +if [ -f /run/secrets/settings.yaml ]; then + cat /run/secrets/settings.yaml > "$HOME/.scion/settings.yaml" +fi +exec scion server start \ + --foreground --production --dev-auth \ + --enable-hub --enable-runtime-broker --enable-web --web-port 8080 \ + --auto-provide --global diff --git a/scripts/cloudrun/hub-settings-template.yaml b/scripts/cloudrun/hub-settings-template.yaml index 65717ec8a..759893f3a 100644 --- a/scripts/cloudrun/hub-settings-template.yaml +++ b/scripts/cloudrun/hub-settings-template.yaml @@ -1,4 +1,5 @@ schema_version: "1" +active_profile: default server: database: driver: sqlite From 66ddb9380db1213133dbaf8d3890278405e12ad3 Mon Sep 17 00:00:00 2001 From: Scion Date: Thu, 4 Jun 2026 04:45:55 +0000 Subject: [PATCH 5/6] feat(sciontool): add Google OIDC identity token transport for Cloud Run auth When running on GCP (GKE, Cloud Run, GCE), automatically fetches an OIDC identity token from the metadata server and adds it as Authorization: Bearer on all hub requests. Cloud Run's --no-allow-unauthenticated requires this as the outer auth layer; the inner hub auth continues to use the existing X-Scion-Agent-Token custom header (no conflict). - oidc.go: oidcTokenSource (cached, 5-min refresh margin), oidcTransport (http.RoundTripper wrapper), maybeConfigureOIDC() called from both NewClient() and NewClientWithConfig() - Audience defaults to hub URL, overridable via SCION_HUB_OIDC_AUDIENCE - isOnGCPFunc injectable for tests; graceful degradation if metadata unavailable - 10 new tests in oidc_test.go; all 40 pkg tests pass --- pkg/sciontool/hub/client.go | 8 +- pkg/sciontool/hub/oidc.go | 157 +++++++++++++++++ pkg/sciontool/hub/oidc_test.go | 311 +++++++++++++++++++++++++++++++++ 3 files changed, 474 insertions(+), 2 deletions(-) create mode 100644 pkg/sciontool/hub/oidc.go create mode 100644 pkg/sciontool/hub/oidc_test.go diff --git a/pkg/sciontool/hub/client.go b/pkg/sciontool/hub/client.go index 20834adc5..bc07143fc 100644 --- a/pkg/sciontool/hub/client.go +++ b/pkg/sciontool/hub/client.go @@ -175,7 +175,7 @@ func NewClient() *Client { return nil } - return &Client{ + c := &Client{ hubURL: hubURL, token: token, agentID: agentID, @@ -186,11 +186,13 @@ func NewClient() *Client { Timeout: DefaultTimeout, }, } + c.maybeConfigureOIDC() + return c } // NewClientWithConfig creates a new Hub client with explicit configuration. func NewClientWithConfig(hubURL, token, agentID string) *Client { - return &Client{ + c := &Client{ hubURL: hubURL, token: token, agentID: agentID, @@ -201,6 +203,8 @@ func NewClientWithConfig(hubURL, token, agentID string) *Client { Timeout: DefaultTimeout, }, } + c.maybeConfigureOIDC() + return c } // IsConfigured returns true if the client is properly configured. diff --git a/pkg/sciontool/hub/oidc.go b/pkg/sciontool/hub/oidc.go new file mode 100644 index 000000000..4ec51ccb3 --- /dev/null +++ b/pkg/sciontool/hub/oidc.go @@ -0,0 +1,157 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hub + +import ( + "fmt" + "io" + "net/http" + "os" + "sync" + "time" + + "cloud.google.com/go/compute/metadata" + + "github.com/GoogleCloudPlatform/scion/pkg/sciontool/log" +) + +const ( + // EnvHubOIDCAudience overrides the audience claim in the OIDC identity token. + EnvHubOIDCAudience = "SCION_HUB_OIDC_AUDIENCE" + + gcpMetadataBaseURL = "http://metadata.google.internal" + + oidcRefreshMargin = 5 * time.Minute + oidcDefaultTTL = 1 * time.Hour + oidcFetchTimeout = 2 * time.Second +) + +// isOnGCPFunc detects whether we're running on GCP. Override in tests. +var isOnGCPFunc = func() bool { return metadata.OnGCE() } + +// oidcTokenSource fetches and caches Google OIDC identity tokens from the +// GCE metadata server. +type oidcTokenSource struct { + audience string + metadataBaseURL string + httpClient *http.Client + + mu sync.RWMutex + token string + expiresAt time.Time +} + +func (s *oidcTokenSource) getToken() (string, error) { + s.mu.RLock() + if s.token != "" && time.Now().Before(s.expiresAt.Add(-oidcRefreshMargin)) { + tok := s.token + s.mu.RUnlock() + return tok, nil + } + s.mu.RUnlock() + + s.mu.Lock() + defer s.mu.Unlock() + + // Double-check after acquiring write lock. + if s.token != "" && time.Now().Before(s.expiresAt.Add(-oidcRefreshMargin)) { + return s.token, nil + } + + url := fmt.Sprintf("%s/computeMetadata/v1/instance/service-accounts/default/identity?audience=%s&format=full", + s.metadataBaseURL, s.audience) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return "", fmt.Errorf("oidc: build request: %w", err) + } + req.Header.Set("Metadata-Flavor", "Google") + + resp, err := s.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("oidc: metadata fetch: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("oidc: metadata server returned %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("oidc: read response: %w", err) + } + + tok := string(body) + expiry, err := ParseTokenExpiry(tok) + if err != nil { + expiry = time.Now().Add(oidcDefaultTTL) + } + + s.token = tok + s.expiresAt = expiry + return tok, nil +} + +// oidcTransport is an http.RoundTripper that injects a Google OIDC identity +// token as an Authorization header on outgoing requests. +type oidcTransport struct { + base http.RoundTripper + source *oidcTokenSource +} + +func (t *oidcTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if req.Header.Get("Authorization") == "" { + tok, err := t.source.getToken() + if err != nil { + log.Debug("OIDC token fetch failed, skipping Authorization header: %v", err) + } else { + req = req.Clone(req.Context()) + req.Header.Set("Authorization", "Bearer "+tok) + } + } + return t.base.RoundTrip(req) +} + +func newOIDCTransport(base http.RoundTripper, audience, metadataBaseURL string) *oidcTransport { + if base == nil { + base = http.DefaultTransport + } + return &oidcTransport{ + base: base, + source: &oidcTokenSource{ + audience: audience, + metadataBaseURL: metadataBaseURL, + httpClient: &http.Client{Timeout: oidcFetchTimeout}, + }, + } +} + +// maybeConfigureOIDC wraps the client's HTTP transport with an OIDC token +// injector when running on GCP. This enables transparent authentication +// against Cloud Run-hosted hubs. +func (c *Client) maybeConfigureOIDC() { + if !isOnGCPFunc() { + return + } + + audience := os.Getenv(EnvHubOIDCAudience) + if audience == "" { + audience = c.hubURL + } + + c.client.Transport = newOIDCTransport(c.client.Transport, audience, gcpMetadataBaseURL) + log.Debug("Configured OIDC transport for Cloud Run auth (audience=%s)", audience) +} diff --git a/pkg/sciontool/hub/oidc_test.go b/pkg/sciontool/hub/oidc_test.go new file mode 100644 index 000000000..3f84ee947 --- /dev/null +++ b/pkg/sciontool/hub/oidc_test.go @@ -0,0 +1,311 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hub + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// makeTestJWT builds a minimal JWT with the given expiry for testing. +func makeTestJWT(exp time.Time) string { + header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`)) + payload, _ := json.Marshal(map[string]interface{}{"exp": exp.Unix(), "iss": "test"}) + payloadB64 := base64.RawURLEncoding.EncodeToString(payload) + sig := base64.RawURLEncoding.EncodeToString([]byte("fakesig")) + return fmt.Sprintf("%s.%s.%s", header, payloadB64, sig) +} + +func overrideGCPDetection(val bool) func() { + orig := isOnGCPFunc + isOnGCPFunc = func() bool { return val } + return func() { isOnGCPFunc = orig } +} + +func TestOIDCTokenSource_FetchAndCache(t *testing.T) { + var fetchCount atomic.Int32 + token := makeTestJWT(time.Now().Add(1 * time.Hour)) + + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "Google", r.Header.Get("Metadata-Flavor")) + assert.Contains(t, r.URL.Query().Get("audience"), "https://hub.example.com") + assert.Equal(t, "full", r.URL.Query().Get("format")) + fetchCount.Add(1) + fmt.Fprint(w, token) + })) + defer metaSrv.Close() + + src := &oidcTokenSource{ + audience: "https://hub.example.com", + metadataBaseURL: metaSrv.URL, + httpClient: &http.Client{Timeout: 2 * time.Second}, + } + + tok1, err := src.getToken() + require.NoError(t, err) + assert.Equal(t, token, tok1) + + tok2, err := src.getToken() + require.NoError(t, err) + assert.Equal(t, token, tok2) + + assert.Equal(t, int32(1), fetchCount.Load(), "second call should use cache") +} + +func TestOIDCTokenSource_RefreshExpired(t *testing.T) { + var fetchCount atomic.Int32 + token1 := makeTestJWT(time.Now().Add(1 * time.Hour)) + token2 := makeTestJWT(time.Now().Add(2 * time.Hour)) + + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if fetchCount.Add(1) == 1 { + fmt.Fprint(w, token1) + } else { + fmt.Fprint(w, token2) + } + })) + defer metaSrv.Close() + + src := &oidcTokenSource{ + audience: "https://hub.example.com", + metadataBaseURL: metaSrv.URL, + httpClient: &http.Client{Timeout: 2 * time.Second}, + } + + tok, err := src.getToken() + require.NoError(t, err) + assert.Equal(t, token1, tok) + + // Simulate expiry by setting expiresAt to the past. + src.mu.Lock() + src.expiresAt = time.Now().Add(-1 * time.Minute) + src.mu.Unlock() + + tok, err = src.getToken() + require.NoError(t, err) + assert.Equal(t, token2, tok) + assert.Equal(t, int32(2), fetchCount.Load()) +} + +func TestOIDCTokenSource_RefreshWithinMargin(t *testing.T) { + var fetchCount atomic.Int32 + token1 := makeTestJWT(time.Now().Add(1 * time.Hour)) + token2 := makeTestJWT(time.Now().Add(2 * time.Hour)) + + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if fetchCount.Add(1) == 1 { + fmt.Fprint(w, token1) + } else { + fmt.Fprint(w, token2) + } + })) + defer metaSrv.Close() + + src := &oidcTokenSource{ + audience: "https://hub.example.com", + metadataBaseURL: metaSrv.URL, + httpClient: &http.Client{Timeout: 2 * time.Second}, + } + + tok, err := src.getToken() + require.NoError(t, err) + assert.Equal(t, token1, tok) + + // Set expiry to 3 minutes from now (within 5-minute margin). + src.mu.Lock() + src.expiresAt = time.Now().Add(3 * time.Minute) + src.mu.Unlock() + + tok, err = src.getToken() + require.NoError(t, err) + assert.Equal(t, token2, tok, "should re-fetch when within refresh margin") +} + +func TestOIDCTransport_InjectsHeader(t *testing.T) { + var receivedAuth string + hubSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + w.WriteHeader(http.StatusOK) + })) + defer hubSrv.Close() + + token := makeTestJWT(time.Now().Add(1 * time.Hour)) + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, token) + })) + defer metaSrv.Close() + + transport := newOIDCTransport(http.DefaultTransport, "https://hub.example.com", metaSrv.URL) + client := &http.Client{Transport: transport} + + req, _ := http.NewRequest("GET", hubSrv.URL+"/test", nil) + resp, err := client.Do(req) + require.NoError(t, err) + resp.Body.Close() + + assert.Equal(t, "Bearer "+token, receivedAuth) +} + +func TestOIDCTransport_DoesNotOverrideExistingAuth(t *testing.T) { + var receivedAuth string + hubSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAuth = r.Header.Get("Authorization") + w.WriteHeader(http.StatusOK) + })) + defer hubSrv.Close() + + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Fatal("metadata server should not be called when Authorization is already set") + })) + defer metaSrv.Close() + + transport := newOIDCTransport(http.DefaultTransport, "https://hub.example.com", metaSrv.URL) + client := &http.Client{Transport: transport} + + req, _ := http.NewRequest("GET", hubSrv.URL+"/test", nil) + req.Header.Set("Authorization", "Bearer existing-token") + resp, err := client.Do(req) + require.NoError(t, err) + resp.Body.Close() + + assert.Equal(t, "Bearer existing-token", receivedAuth) +} + +func TestOIDCTransport_GracefulDegradation(t *testing.T) { + var requestReceived bool + hubSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestReceived = true + assert.Empty(t, r.Header.Get("Authorization"), "no auth header when metadata fails") + w.WriteHeader(http.StatusOK) + })) + defer hubSrv.Close() + + // Point at an unreachable metadata server. + transport := newOIDCTransport(http.DefaultTransport, "https://hub.example.com", "http://127.0.0.1:1") + transport.source.httpClient.Timeout = 100 * time.Millisecond + client := &http.Client{Transport: transport} + + req, _ := http.NewRequest("GET", hubSrv.URL+"/test", nil) + resp, err := client.Do(req) + require.NoError(t, err) + resp.Body.Close() + + assert.True(t, requestReceived, "request should proceed even when metadata fetch fails") +} + +func TestMaybeConfigureOIDC_NotOnGCP(t *testing.T) { + cleanup := overrideGCPDetection(false) + defer cleanup() + + c := &Client{ + hubURL: "https://hub.example.com", + client: &http.Client{Timeout: DefaultTimeout}, + } + + c.maybeConfigureOIDC() + + assert.Nil(t, c.client.Transport, "transport should not be wrapped when not on GCP") +} + +func TestMaybeConfigureOIDC_OnGCP(t *testing.T) { + cleanup := overrideGCPDetection(true) + defer cleanup() + + c := &Client{ + hubURL: "https://hub.example.com", + client: &http.Client{Timeout: DefaultTimeout}, + } + + c.maybeConfigureOIDC() + + require.NotNil(t, c.client.Transport) + ot, ok := c.client.Transport.(*oidcTransport) + require.True(t, ok, "transport should be oidcTransport") + assert.Equal(t, "https://hub.example.com", ot.source.audience) +} + +func TestMaybeConfigureOIDC_AudienceOverride(t *testing.T) { + cleanup := overrideGCPDetection(true) + defer cleanup() + + origAud := os.Getenv(EnvHubOIDCAudience) + os.Setenv(EnvHubOIDCAudience, "https://custom-audience.example.com") + defer os.Setenv(EnvHubOIDCAudience, origAud) + + c := &Client{ + hubURL: "https://hub.example.com", + client: &http.Client{Timeout: DefaultTimeout}, + } + + c.maybeConfigureOIDC() + + require.NotNil(t, c.client.Transport) + ot := c.client.Transport.(*oidcTransport) + assert.Equal(t, "https://custom-audience.example.com", ot.source.audience) +} + +func TestOIDC_EndToEnd_BothHeaders(t *testing.T) { + cleanup := overrideGCPDetection(true) + defer cleanup() + + token := makeTestJWT(time.Now().Add(1 * time.Hour)) + metaSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, token) + })) + defer metaSrv.Close() + + var gotAuth, gotAgentToken string + hubSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + gotAgentToken = r.Header.Get("X-Scion-Agent-Token") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(map[string]string{"status": "ok"}) + })) + defer hubSrv.Close() + + // Override GCP metadata URL by directly constructing the client with OIDC transport. + c := &Client{ + hubURL: hubSrv.URL, + token: "test-agent-token", + agentID: "test-agent-123", + maxRetries: 1, + retryBaseDelay: 10 * time.Millisecond, + retryMaxDelay: 10 * time.Millisecond, + client: &http.Client{ + Timeout: DefaultTimeout, + }, + } + c.client.Transport = newOIDCTransport(c.client.Transport, hubSrv.URL, metaSrv.URL) + + err := c.UpdateStatus(context.Background(), StatusUpdate{ + Status: "running", + Message: "test", + }) + require.NoError(t, err) + + assert.Equal(t, "Bearer "+token, gotAuth, "OIDC Authorization header should be set") + assert.Equal(t, "test-agent-token", gotAgentToken, "X-Scion-Agent-Token should still be set") +} From 66b04f67d6a34a16cee82eff00978221c44226a0 Mon Sep 17 00:00:00 2001 From: Scion Date: Thu, 4 Jun 2026 11:15:56 +0000 Subject: [PATCH 6/6] fix(cloudrun): add image_registry to hub settings template Required for the broker to know where to pull agent container images on GKE. Found during E2E test. --- scripts/cloudrun/hub-settings-template.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/cloudrun/hub-settings-template.yaml b/scripts/cloudrun/hub-settings-template.yaml index 759893f3a..43152c98b 100644 --- a/scripts/cloudrun/hub-settings-template.yaml +++ b/scripts/cloudrun/hub-settings-template.yaml @@ -1,4 +1,5 @@ schema_version: "1" +image_registry: "us-central1-docker.pkg.dev/deploy-demo-test/public-docker" active_profile: default server: database: