From ae020bfd271ad0f40517e570500a256e9e121ad2 Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Thu, 21 May 2026 22:15:00 -0700 Subject: [PATCH 1/4] feat(dind): gate docker run --privileged / --cap-add behind config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds [dind] allow_privileged to gate the elevation stack a sibling container can request via the fake docker daemon (Privileged=true, all caps, all devices, seccomp/apparmor off, writable sysfs/cgroupfs). Requests carrying HostConfig.Privileged=true or HostConfig.CapAdd are short-circuited with HTTP 403 when the gate is closed, before any image pull. Default policy is platform-aware: * Windows / macOS host → allowed. The dind backing containerd runs inside a managed VM (WSL2 / Hyper-V / Vz), so an escape stays inside that VM. KIND clusters and other workloads that need real privileged continue to work without config changes. * Linux host → denied. ephemerd runs directly on the host with no VM fence; a privileged escape is bare-metal-host compromise. Operators that trust their workloads can opt in with `allow_privileged = true`. The TOML key uses a *bool so an empty config block is distinguishable from an explicit `allow_privileged = false`. Both runtime.Config and the per-job dind.Server gain an AllowPrivileged field; main.go threads cfg.Dind.ResolvedAllowPrivileged() through at both runtime.New sites. Also corrects the package doc comment that previously claimed dind never produces privileged containers — that was true once, isn't now. Local CI: lint and tests pass cross-compiled for GOOS=linux. `mage test` on Windows still hits the documented pkcs11/ocicrypt cgo build failure for pkg/dind on Windows hosts; GOOS=linux go test -run xxx compiles the package clean. --- cmd/ephemerd/main.go | 86 +++++++++++------------ pkg/config/config.go | 97 ++++++++++++++++++-------- pkg/config/config_test.go | 81 ++++++++++++++++++++++ pkg/dind/containers.go | 42 ++++++++---- pkg/dind/dind.go | 70 ++++++++++++------- pkg/dind/privileged_gate_test.go | 113 +++++++++++++++++++++++++++++++ pkg/runtime/runtime.go | 34 ++++++---- 7 files changed, 400 insertions(+), 123 deletions(-) create mode 100644 pkg/dind/privileged_gate_test.go diff --git a/cmd/ephemerd/main.go b/cmd/ephemerd/main.go index 8567d4e..fd07ce4 100644 --- a/cmd/ephemerd/main.go +++ b/cmd/ephemerd/main.go @@ -26,8 +26,8 @@ import ( "github.com/ephpm/ephemerd/pkg/providers/forgejo" "github.com/ephpm/ephemerd/pkg/providers/gitea" githubProv "github.com/ephpm/ephemerd/pkg/providers/github" - goproxy "github.com/ephpm/ephemerd/pkg/proxies/go" "github.com/ephpm/ephemerd/pkg/proxies" + goproxy "github.com/ephpm/ephemerd/pkg/proxies/go" "github.com/ephpm/ephemerd/pkg/runner" "github.com/ephpm/ephemerd/pkg/runtime" "github.com/ephpm/ephemerd/pkg/scheduler" @@ -271,17 +271,18 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP } rt, err := runtime.New(runtime.Config{ - Client: ctrdClient, - RunnerDir: rm.Dir(), - RunnerMount: rm.ContainerDir(), - LogDir: joinPath(configDir, "logs"), - DataDir: configDir, - DindEnabled: cfg.Dind.Enabled, - Network: net, - WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), - WindowsCPUs: cfg.Runner.Windows.CPUCount(), - BuildKit: bk, - Log: log, + Client: ctrdClient, + RunnerDir: rm.Dir(), + RunnerMount: rm.ContainerDir(), + LogDir: joinPath(configDir, "logs"), + DataDir: configDir, + DindEnabled: cfg.Dind.Enabled, + DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(), + Network: net, + WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), + WindowsCPUs: cfg.Runner.Windows.CPUCount(), + BuildKit: bk, + Log: log, }) if err != nil { return fmt.Errorf("creating runtime: %w", err) @@ -443,21 +444,22 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP containerDataDir = "/mnt/ephemerd" } rt, err := runtime.New(runtime.Config{ - Client: ctrdClient, - RunnerDir: rm.Dir(), - RunnerMount: rm.ContainerDir(), - DefaultImage: cfg.Runner.DefaultImage, - ImagesDir: joinPath(configDir, "images"), - LogDir: joinPath(configDir, "logs"), - DataDir: configDir, - ContainerDataDir: containerDataDir, - DindEnabled: cfg.Dind.Enabled, - CacheProxyEnv: cacheProxyEnvVars, - Network: net, - WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), - WindowsCPUs: cfg.Runner.Windows.CPUCount(), - BuildKit: bk, - Log: log, + Client: ctrdClient, + RunnerDir: rm.Dir(), + RunnerMount: rm.ContainerDir(), + DefaultImage: cfg.Runner.DefaultImage, + ImagesDir: joinPath(configDir, "images"), + LogDir: joinPath(configDir, "logs"), + DataDir: configDir, + ContainerDataDir: containerDataDir, + DindEnabled: cfg.Dind.Enabled, + DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(), + CacheProxyEnv: cacheProxyEnvVars, + Network: net, + WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), + WindowsCPUs: cfg.Runner.Windows.CPUCount(), + BuildKit: bk, + Log: log, }) if err != nil { return fmt.Errorf("creating runtime: %w", err) @@ -524,21 +526,21 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP // Start scheduler (ties CI provider jobs to container lifecycle) sched := scheduler.New(scheduler.Config{ - Runtime: rt, - Providers: activeProviders, - Artifacts: artifactExtractor, - LinuxDispatcher: linuxDispatcher, - DataDir: configDir, - MaxConcurrent: cfg.Runner.MaxConcurrent, - MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent, - Labels: cfg.Runner.ExtraLabels, - PollInterval: pollInterval(cfg), - WebhookPort: cfg.Webhook.Port, - WebhookSecret: cfg.Webhook.Secret, - TLSCert: cfg.Webhook.TLSCert, - TLSKey: cfg.Webhook.TLSKey, - Tunnel: tunnelProvider, - TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries, + Runtime: rt, + Providers: activeProviders, + Artifacts: artifactExtractor, + LinuxDispatcher: linuxDispatcher, + DataDir: configDir, + MaxConcurrent: cfg.Runner.MaxConcurrent, + MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent, + Labels: cfg.Runner.ExtraLabels, + PollInterval: pollInterval(cfg), + WebhookPort: cfg.Webhook.Port, + WebhookSecret: cfg.Webhook.Secret, + TLSCert: cfg.Webhook.TLSCert, + TLSKey: cfg.Webhook.TLSKey, + Tunnel: tunnelProvider, + TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries, JobTimeout: cfg.Runner.ParsedJobTimeout(), ShutdownTimeout: cfg.Runner.ParsedShutdownTimeout(), LogRetention: cfg.Log.LogRetentionDuration(), diff --git a/pkg/config/config.go b/pkg/config/config.go index 0a6e295..04a7ab0 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -15,20 +15,20 @@ import ( ) type Config struct { - GitHub GitHubConfig `toml:"github"` - Forgejo ForgejoConfig `toml:"forgejo"` - Gitea GiteaConfig `toml:"gitea"` - GitLab GitLabConfig `toml:"gitlab"` - Woodpecker WoodpeckerConfig `toml:"woodpecker"` - Webhook WebhookConfig `toml:"webhook"` - Containerd ContainerdConfig `toml:"containerd"` - Network NetworkConfig `toml:"network"` - VM VMConfig `toml:"vm"` + GitHub GitHubConfig `toml:"github"` + Forgejo ForgejoConfig `toml:"forgejo"` + Gitea GiteaConfig `toml:"gitea"` + GitLab GitLabConfig `toml:"gitlab"` + Woodpecker WoodpeckerConfig `toml:"woodpecker"` + Webhook WebhookConfig `toml:"webhook"` + Containerd ContainerdConfig `toml:"containerd"` + Network NetworkConfig `toml:"network"` + VM VMConfig `toml:"vm"` Dind DindConfig `toml:"dind"` ModuleProxy ModuleProxyConfig `toml:"module_proxy"` Runner RunnerConfig `toml:"runner"` - Metrics MetricsConfig `toml:"metrics"` - Log LogConfig `toml:"log"` + Metrics MetricsConfig `toml:"metrics"` + Log LogConfig `toml:"log"` } // Provider returns the name of the first configured forge provider. @@ -77,14 +77,14 @@ type MetricsConfig struct { // By default, ephemerd uses polling (tunnel = "none"). // Set tunnel = "localtunnel" or "ngrok" for instant webhook delivery. type WebhookConfig struct { - Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty) - Port int `toml:"port"` // listen port for health endpoint (default 8080) - TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel) - TLSKey string `toml:"tls_key"` // TLS private key path - Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok" - TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL - NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env) - TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5) + Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty) + Port int `toml:"port"` // listen port for health endpoint (default 8080) + TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel) + TLSKey string `toml:"tls_key"` // TLS private key path + Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok" + TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL + NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env) + TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5) } // NetworkConfig configures container networking. @@ -113,6 +113,45 @@ type DindConfig struct { // Set to 0 to disable eviction (only empty-namespace cleanup runs). // Default 168h (7 days). CacheMaxAge time.Duration `toml:"cache_max_age"` + + // AllowPrivileged controls whether `docker run --privileged` (or + // HostConfig.Privileged=true / HostConfig.CapAdd) from inside a job + // is honored. When true, a sibling container can request the full + // elevation stack (all caps, all devices, seccomp/apparmor off, + // writable sysfs/cgroupfs) — needed for KIND clusters, nested + // containerd, /dev/fuse-style mounts, etc. When false, such requests + // are rejected with HTTP 403. + // + // SECURITY: a privileged sibling container is effectively root on + // whatever host runs the containerd that backs dind. On Windows and + // macOS hosts that backing containerd lives inside a managed Linux + // VM (WSL2 / Hyper-V / Vz), so an escape only reaches the VM. On a + // Linux host with no VM fence, an escape reaches the bare-metal host + // — set this to false unless every workload is trusted. + // + // Use the pointer form so an empty/missing TOML key is + // distinguishable from an explicit `allow_privileged = false`. See + // ResolvedAllowPrivileged for the default policy. + AllowPrivileged *bool `toml:"allow_privileged"` +} + +// ResolvedAllowPrivileged returns whether privileged dind sibling +// containers are allowed, applying the platform-aware default when the +// operator hasn't set the key explicitly. +// +// Default policy: +// - Windows / macOS host → true. The dind containerd backing store +// runs inside a VM that ephemerd manages (WSL2/Hyper-V on Windows, +// Vz on macOS), so the worst-case escape stays inside that VM. +// - Linux host → false. ephemerd runs directly on the host with no +// VM fence, so a privileged escape is bare-metal-host compromise. +// Operators that trust their workloads (e.g. internal CI for KIND +// tests) can opt in via `allow_privileged = true`. +func (d *DindConfig) ResolvedAllowPrivileged() bool { + if d.AllowPrivileged != nil { + return *d.AllowPrivileged + } + return goruntime.GOOS != "linux" } // DindCachePruneInterval returns the prune interval with the default @@ -148,9 +187,9 @@ type VMConfig struct { // CrossPlatform enables macOS and Windows VM support. Default true. // Set to false for platforms like Gitea/Forgejo that only support // Linux runners — this skips macOS image pulls and Windows VM setup. - CrossPlatform *bool `toml:"cross_platform"` - Linux LinuxVMToml `toml:"linux"` - MacOS MacOSVMToml `toml:"macos"` + CrossPlatform *bool `toml:"cross_platform"` + Linux LinuxVMToml `toml:"linux"` + MacOS MacOSVMToml `toml:"macos"` } // CrossPlatformEnabled returns whether macOS/Windows VM support is enabled. @@ -165,9 +204,9 @@ func (v *VMConfig) CrossPlatformEnabled() bool { // LinuxVMToml configures the long-running Linux VM for Linux jobs // on Windows (Hyper-V) and macOS (Virtualization.framework) hosts. type LinuxVMToml struct { - Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs - CPUs uint `toml:"cpus"` // virtual CPUs (default: 2) - MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048) + Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs + CPUs uint `toml:"cpus"` // virtual CPUs (default: 2) + MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048) DiskSizeGB uint64 `toml:"disk_size_gb"` // sparse disk size in GB (default: 50) } @@ -182,10 +221,10 @@ type MacOSVMToml struct { // Apple-signed IPSW on first boot and installs stock macOS into // /vm/macos/base.img. Distinct from the OCI base image // overlaid per job — that's fetched from the job's image label. - DiskImage string `toml:"disk_image"` - CPUs uint `toml:"cpus"` // CPUs per VM (default: 4) - MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192) - MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs) + DiskImage string `toml:"disk_image"` + CPUs uint `toml:"cpus"` // CPUs per VM (default: 4) + MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192) + MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs) } type GitHubConfig struct { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b479dd4..8dad1a2 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -5,6 +5,7 @@ import ( "context" "os" "path/filepath" + goruntime "runtime" "slices" "testing" "time" @@ -1766,3 +1767,83 @@ func TestParsedPollInterval_Hours(t *testing.T) { t.Errorf("PollInterval(2h) = %v, want 2h", d) } } + +func TestResolvedAllowPrivileged_DefaultByOS(t *testing.T) { + d := DindConfig{} + got := d.ResolvedAllowPrivileged() + wantTrue := goruntime.GOOS != "linux" + if got != wantTrue { + t.Errorf("default ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, wantTrue) + } +} + +func TestResolvedAllowPrivileged_ExplicitTrueWins(t *testing.T) { + yes := true + d := DindConfig{AllowPrivileged: &yes} + if !d.ResolvedAllowPrivileged() { + t.Error("explicit allow_privileged=true should resolve true on every OS") + } +} + +func TestResolvedAllowPrivileged_ExplicitFalseWins(t *testing.T) { + no := false + d := DindConfig{AllowPrivileged: &no} + if d.ResolvedAllowPrivileged() { + t.Errorf("explicit allow_privileged=false should resolve false on every OS (GOOS=%s)", goruntime.GOOS) + } +} + +func TestLoad_DindAllowPrivileged_OmittedUsesPlatformDefault(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" + +[dind] +enabled = true +`), 0644); err != nil { + t.Fatal(err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Dind.AllowPrivileged != nil { + t.Errorf("AllowPrivileged ptr = %v, want nil (key not set in TOML)", *cfg.Dind.AllowPrivileged) + } + want := goruntime.GOOS != "linux" + if got := cfg.Dind.ResolvedAllowPrivileged(); got != want { + t.Errorf("ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, want) + } +} + +func TestLoad_DindAllowPrivileged_ExplicitFalseOnAnyOS(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" + +[dind] +enabled = true +allow_privileged = false +`), 0644); err != nil { + t.Fatal(err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Dind.AllowPrivileged == nil { + t.Fatal("AllowPrivileged ptr is nil; TOML did not bind the explicit false") + } + if *cfg.Dind.AllowPrivileged { + t.Errorf("AllowPrivileged = true, want false") + } + if cfg.Dind.ResolvedAllowPrivileged() { + t.Error("ResolvedAllowPrivileged() should honor explicit false even on non-Linux hosts") + } +} diff --git a/pkg/dind/containers.go b/pkg/dind/containers.go index db84e09..a9e5a44 100644 --- a/pkg/dind/containers.go +++ b/pkg/dind/containers.go @@ -111,17 +111,17 @@ type createRequest struct { } type hostConfig struct { - Binds []string `json:"Binds"` - NetworkMode string `json:"NetworkMode"` - Privileged bool `json:"Privileged"` - SecurityOpt []string `json:"SecurityOpt"` - CapAdd []string `json:"CapAdd"` - Tmpfs map[string]string `json:"Tmpfs"` - PortBindings map[string][]portBinding `json:"PortBindings"` - RestartPolicy *restartPolicy `json:"RestartPolicy"` - Init *bool `json:"Init"` - CgroupnsMode string `json:"CgroupnsMode"` - ExtraHosts []string `json:"ExtraHosts"` + Binds []string `json:"Binds"` + NetworkMode string `json:"NetworkMode"` + Privileged bool `json:"Privileged"` + SecurityOpt []string `json:"SecurityOpt"` + CapAdd []string `json:"CapAdd"` + Tmpfs map[string]string `json:"Tmpfs"` + PortBindings map[string][]portBinding `json:"PortBindings"` + RestartPolicy *restartPolicy `json:"RestartPolicy"` + Init *bool `json:"Init"` + CgroupnsMode string `json:"CgroupnsMode"` + ExtraHosts []string `json:"ExtraHosts"` } type portBinding struct { @@ -230,6 +230,25 @@ func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { return } + // Privileged-elevation gate. Reject before any image pull so a job + // can't burn bandwidth probing for a configuration we'll refuse. + if !s.allowPrivileged && req.HostConfig != nil { + if req.HostConfig.Privileged { + s.log.Warn("rejecting privileged container request", "image", req.Image, "reason", "dind.allow_privileged is false on this host") + writeJSON(w, http.StatusForbidden, map[string]string{ + "message": "privileged containers are disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", + }) + return + } + if len(req.HostConfig.CapAdd) > 0 { + s.log.Warn("rejecting --cap-add container request", "image", req.Image, "caps", req.HostConfig.CapAdd, "reason", "dind.allow_privileged is false on this host") + writeJSON(w, http.StatusForbidden, map[string]string{ + "message": fmt.Sprintf("--cap-add (%v) is disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", req.HostConfig.CapAdd), + }) + return + } + } + id := generateContainerID() name := strings.TrimPrefix(r.URL.Query().Get("name"), "/") @@ -1563,4 +1582,3 @@ func writeContainerHosts(entry *containerEntry) error { return os.WriteFile(entry.HostsPath, []byte(b.String()), 0o644) } - diff --git a/pkg/dind/dind.go b/pkg/dind/dind.go index aab80c7..5fb5e86 100644 --- a/pkg/dind/dind.go +++ b/pkg/dind/dind.go @@ -3,7 +3,16 @@ // Each job gets its own server instance listening on a Unix socket. // The socket is bind-mounted into the job container at /var/run/docker.sock. // Docker CLI calls inside the container are translated into containerd -// operations on the host — no real Docker daemon, no privileged containers. +// operations on the host — no real Docker daemon. +// +// Sibling containers created through this API can opt into the full +// docker --privileged elevation stack (all caps, all devices, seccomp +// and apparmor unconfined, writable sysfs/cgroupfs) when the host is +// configured to allow it. See Server.allowPrivileged and +// config.DindConfig.AllowPrivileged for the threat model — the short +// version is that on Windows and macOS hosts the dind containerd lives +// inside a managed Linux VM, so an escape stays in that VM; on a Linux +// host with no VM fence the default is to deny privileged requests. package dind import ( @@ -33,21 +42,22 @@ const sharedNamespace = "ephemerd" // Server is a per-job fake Docker daemon. type Server struct { - jobID string - jobNamespace string // per-job containerd namespace for isolation - cacheNamespace string // per-(provider,repo) shared image cache namespace; empty disables caching - sockPath string // host-side unix socket path (Linux/macOS only) - endpoint string // what the container should set DOCKER_HOST to (e.g. "tcp://gw:port" on Windows) - listener net.Listener - server *http.Server - client *client.Client - network *networking.Manager - buildkit *buildkit.Server // shared embedded BuildKit solver (nil → fall back to platform default) - runnerNetNS string // path to runner container's net namespace; used to install DNAT rules for port bindings - log *slog.Logger + jobID string + jobNamespace string // per-job containerd namespace for isolation + cacheNamespace string // per-(provider,repo) shared image cache namespace; empty disables caching + sockPath string // host-side unix socket path (Linux/macOS only) + endpoint string // what the container should set DOCKER_HOST to (e.g. "tcp://gw:port" on Windows) + listener net.Listener + server *http.Server + client *client.Client + network *networking.Manager + buildkit *buildkit.Server // shared embedded BuildKit solver (nil → fall back to platform default) + runnerNetNS string // path to runner container's net namespace; used to install DNAT rules for port bindings + allowPrivileged bool // gate for docker run --privileged / --cap-add; see config.DindConfig.AllowPrivileged + log *slog.Logger mu sync.Mutex - images map[string]*imageEntry // in-memory image store scoped to this job + images map[string]*imageEntry // in-memory image store scoped to this job containers map[string]*containerEntry // containers created through this socket execs map[string]*execEntry // exec processes inside containers networks map[string]*networkEntry // Docker networks (logical, in-memory) @@ -100,6 +110,13 @@ type Config struct { // the sibling's container IP. Empty disables port forwarding. RunnerNetNS string + // AllowPrivileged controls whether sibling containers may opt into + // the full elevation stack via HostConfig.Privileged or via + // HostConfig.CapAdd. When false, requests carrying either are + // rejected with HTTP 403. See config.DindConfig.AllowPrivileged for + // the threat model. + AllowPrivileged bool + Log *slog.Logger } @@ -122,18 +139,19 @@ func New(cfg Config) (*Server, error) { jobID: cfg.JobID, // containerd namespace name regex (^[A-Za-z0-9]+(?:[._-](?:[A-Za-z0-9]+))*$) // rejects slashes. Use hyphens to namespace per-job dind state. - jobNamespace: "ephemerd-dind-" + cfg.JobID, - cacheNamespace: CacheNamespace(cfg.Provider, cfg.Repo), - sockPath: sockPath, - client: cfg.Client, - network: cfg.Network, - buildkit: cfg.BuildKit, - runnerNetNS: cfg.RunnerNetNS, - log: cfg.Log.With("component", "dind", "job_id", cfg.JobID), - images: make(map[string]*imageEntry), - containers: make(map[string]*containerEntry), - execs: make(map[string]*execEntry), - networks: make(map[string]*networkEntry), + jobNamespace: "ephemerd-dind-" + cfg.JobID, + cacheNamespace: CacheNamespace(cfg.Provider, cfg.Repo), + sockPath: sockPath, + client: cfg.Client, + network: cfg.Network, + buildkit: cfg.BuildKit, + runnerNetNS: cfg.RunnerNetNS, + allowPrivileged: cfg.AllowPrivileged, + log: cfg.Log.With("component", "dind", "job_id", cfg.JobID), + images: make(map[string]*imageEntry), + containers: make(map[string]*containerEntry), + execs: make(map[string]*execEntry), + networks: make(map[string]*networkEntry), } s.initDefaultBridgeNetwork() return s, nil diff --git a/pkg/dind/privileged_gate_test.go b/pkg/dind/privileged_gate_test.go new file mode 100644 index 0000000..8ee3c51 --- /dev/null +++ b/pkg/dind/privileged_gate_test.go @@ -0,0 +1,113 @@ +package dind + +import ( + "bytes" + "encoding/json" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "testing" + + "github.com/containerd/containerd/v2/client" +) + +// gateTestServer returns a Server in the minimum state required to exercise +// handleContainerCreate's pre-pull validation path. The handler bails out +// before touching the client when the image is invalid or the privileged +// gate rejects the request, so a zero-value *client.Client is enough — we +// just need it non-nil to pass the early nil-check. +func gateTestServer(allow bool) *Server { + return &Server{ + client: &client.Client{}, + allowPrivileged: allow, + log: slog.New(slog.NewTextHandler(io.Discard, nil)), + containers: map[string]*containerEntry{}, + images: map[string]*imageEntry{}, + } +} + +func postCreate(t *testing.T, s *Server, body createRequest) *httptest.ResponseRecorder { + t.Helper() + raw, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal: %v", err) + } + req := httptest.NewRequest(http.MethodPost, "/containers/create", bytes.NewReader(raw)) + w := httptest.NewRecorder() + s.handleContainerCreate(w, req) + return w +} + +func TestHandleContainerCreate_PrivilegedDeniedWhenGateClosed(t *testing.T) { + s := gateTestServer(false) + body := createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{Privileged: true}, + } + w := postCreate(t, s, body) + if w.Code != http.StatusForbidden { + t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte("privileged containers are disabled")) { + t.Errorf("body did not mention privileged disabled: %s", w.Body.String()) + } +} + +func TestHandleContainerCreate_CapAddDeniedWhenGateClosed(t *testing.T) { + s := gateTestServer(false) + body := createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{CapAdd: []string{"SYS_ADMIN"}}, + } + w := postCreate(t, s, body) + if w.Code != http.StatusForbidden { + t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte("SYS_ADMIN")) { + t.Errorf("body should echo requested cap: %s", w.Body.String()) + } +} + +func TestHandleContainerCreate_NonPrivilegedRequestNotGated(t *testing.T) { + // With the gate closed but no Privileged/CapAdd in the request, the + // gate must let the request through. It will subsequently fail on + // image lookup (because the zero-value client has no real backend) + // — we just need to confirm we did NOT get the 403 short-circuit. + s := gateTestServer(false) + body := createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{Privileged: false}, + } + w := postCreate(t, s, body) + if w.Code == http.StatusForbidden { + t.Errorf("non-privileged request was 403'd: %s", w.Body.String()) + } +} + +func TestHandleContainerCreate_NilHostConfigNotGated(t *testing.T) { + // docker run without --privileged sends a HostConfig with zero + // values, but a hand-crafted client could omit HostConfig entirely. + // The gate must not deref a nil HostConfig. + s := gateTestServer(false) + body := createRequest{Image: "alpine:3.20"} + w := postCreate(t, s, body) + if w.Code == http.StatusForbidden { + t.Errorf("nil-HostConfig request was 403'd: %s", w.Body.String()) + } +} + +func TestHandleContainerCreate_PrivilegedAllowedWhenGateOpen(t *testing.T) { + // Mirror image: gate=true must NOT 403, even with Privileged=true. + // (The handler will still fail later on image lookup against the + // zero-value client, but not with 403.) + s := gateTestServer(true) + body := createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{Privileged: true, CapAdd: []string{"SYS_ADMIN"}}, + } + w := postCreate(t, s, body) + if w.Code == http.StatusForbidden { + t.Errorf("gate=true rejected privileged request: %s", w.Body.String()) + } +} diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index bac7b2c..20126bb 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -10,9 +10,9 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "strings" "sync" - goruntime "runtime" "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd/v2/core/containers" @@ -65,9 +65,14 @@ type Config struct { // DataDir must be rewritten to that VM-side path. When empty, falls // back to DataDir. ContainerDataDir string - DindEnabled bool // mount a fake Docker socket into each container - CacheProxyEnv []string // extra env vars from cache proxies (e.g., GOPROXY=...) - Network *networking.Manager + DindEnabled bool // mount a fake Docker socket into each container + // DindAllowPrivileged is forwarded to each per-job dind.Server. + // When false, requests carrying HostConfig.Privileged=true or + // HostConfig.CapAdd are rejected with HTTP 403. See + // config.DindConfig.AllowPrivileged for the threat model. + DindAllowPrivileged bool + CacheProxyEnv []string // extra env vars from cache proxies (e.g., GOPROXY=...) + Network *networking.Manager // WindowsMemoryBytes is the memory limit for Hyper-V isolated Windows // runner containers. Zero leaves the OCI spec field unset, which gives // the HCS default (~1 GB) — too small for MSVC builds. Caller should @@ -101,8 +106,8 @@ func (r *Runtime) Client() *client.Client { // RunnerEnv represents a running runner environment. type RunnerEnv struct { ID string - Netns string // network namespace path (Linux only) - RunnerDir string // per-job runner copy, cleaned up on destroy + Netns string // network namespace path (Linux only) + RunnerDir string // per-job runner copy, cleaned up on destroy Dind *dind.Server // per-job fake Docker daemon (nil if disabled) Container client.Container Task client.Task @@ -691,14 +696,15 @@ func (r *Runtime) Create(ctx context.Context, cfg CreateConfig) (*RunnerEnv, err if r.cfg.DindEnabled { var err error dindServer, err = dind.New(dind.Config{ - JobID: id, - Provider: cfg.Provider, - Repo: cfg.Repo, - DataDir: r.cfg.DataDir, - Client: r.client, - Network: r.cfg.Network, - BuildKit: r.cfg.BuildKit, - Log: r.cfg.Log, + JobID: id, + Provider: cfg.Provider, + Repo: cfg.Repo, + DataDir: r.cfg.DataDir, + Client: r.client, + Network: r.cfg.Network, + BuildKit: r.cfg.BuildKit, + AllowPrivileged: r.cfg.DindAllowPrivileged, + Log: r.cfg.Log, }) if err != nil { return nil, fmt.Errorf("creating dind server for %s: %w", id, err) From 4cf6aff65ee4f0a7d27bbef4385896da224d3a28 Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Thu, 21 May 2026 22:45:00 -0700 Subject: [PATCH 2/4] test(dind): extract gate to pure function; drop nil-client panic in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous tests built a Server with `&client.Client{}` (zero-value) to satisfy the early nil-check on handleContainerCreate's client. That works for the 403 short-circuit cases — the gate fires before the client is touched — but the "not gated" tests proceed past the gate and call GetImage on the zero-value client, which has no gRPC ClientConn and panics. CI caught this in TestHandleContainerCreate_NonPrivilegedRequestNotGated. Extract the gate decision into a pure function checkPrivilegedGate that takes the flag and the parsed HostConfig and returns the rejection message + blocked flag. The handler becomes a one-liner; the tests for the "not gated" cases call the pure function directly and don't need a Server or client at all. The 403 tests still exercise the full handler since that path is self-contained. --- pkg/dind/containers.go | 37 ++++++---- pkg/dind/privileged_gate_test.go | 118 ++++++++++++++++++------------- 2 files changed, 92 insertions(+), 63 deletions(-) diff --git a/pkg/dind/containers.go b/pkg/dind/containers.go index a9e5a44..e10896b 100644 --- a/pkg/dind/containers.go +++ b/pkg/dind/containers.go @@ -207,6 +207,24 @@ func (s *Server) resolveContainerID(nameOrID string) string { return nameOrID } +// checkPrivilegedGate returns a user-facing rejection message and blocked=true +// when the request asks for elevation (Privileged=true or CapAdd) but the gate +// is closed (allowPrivileged=false). Otherwise blocked=false and msg is empty. +// Pure function so the handler stays simple and tests don't need a containerd +// client to exercise the gate logic. +func checkPrivilegedGate(allowPrivileged bool, hc *hostConfig) (msg string, blocked bool) { + if allowPrivileged || hc == nil { + return "", false + } + if hc.Privileged { + return "privileged containers are disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", true + } + if len(hc.CapAdd) > 0 { + return fmt.Sprintf("--cap-add (%v) is disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", hc.CapAdd), true + } + return "", false +} + func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { if s.client == nil { writeJSON(w, http.StatusInternalServerError, map[string]string{ @@ -232,21 +250,10 @@ func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { // Privileged-elevation gate. Reject before any image pull so a job // can't burn bandwidth probing for a configuration we'll refuse. - if !s.allowPrivileged && req.HostConfig != nil { - if req.HostConfig.Privileged { - s.log.Warn("rejecting privileged container request", "image", req.Image, "reason", "dind.allow_privileged is false on this host") - writeJSON(w, http.StatusForbidden, map[string]string{ - "message": "privileged containers are disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", - }) - return - } - if len(req.HostConfig.CapAdd) > 0 { - s.log.Warn("rejecting --cap-add container request", "image", req.Image, "caps", req.HostConfig.CapAdd, "reason", "dind.allow_privileged is false on this host") - writeJSON(w, http.StatusForbidden, map[string]string{ - "message": fmt.Sprintf("--cap-add (%v) is disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", req.HostConfig.CapAdd), - }) - return - } + if msg, blocked := checkPrivilegedGate(s.allowPrivileged, req.HostConfig); blocked { + s.log.Warn("rejecting elevated container request", "image", req.Image, "reason", msg) + writeJSON(w, http.StatusForbidden, map[string]string{"message": msg}) + return } id := generateContainerID() diff --git a/pkg/dind/privileged_gate_test.go b/pkg/dind/privileged_gate_test.go index 8ee3c51..c6d034f 100644 --- a/pkg/dind/privileged_gate_test.go +++ b/pkg/dind/privileged_gate_test.go @@ -7,23 +7,18 @@ import ( "log/slog" "net/http" "net/http/httptest" + "strings" "testing" - - "github.com/containerd/containerd/v2/client" ) -// gateTestServer returns a Server in the minimum state required to exercise -// handleContainerCreate's pre-pull validation path. The handler bails out -// before touching the client when the image is invalid or the privileged -// gate rejects the request, so a zero-value *client.Client is enough — we -// just need it non-nil to pass the early nil-check. +// gateTestServer returns a Server with only the fields handleContainerCreate +// looks at on the 403 short-circuit path: a non-nil logger and the gate flag. +// The client stays nil so the handler returns 500 from the early nil-check — +// fine for the 403 tests since they should never reach that branch. func gateTestServer(allow bool) *Server { return &Server{ - client: &client.Client{}, allowPrivileged: allow, log: slog.New(slog.NewTextHandler(io.Discard, nil)), - containers: map[string]*containerEntry{}, - images: map[string]*imageEntry{}, } } @@ -39,13 +34,15 @@ func postCreate(t *testing.T, s *Server, body createRequest) *httptest.ResponseR return w } +// 403 short-circuit tests: these exercise the full handler path because the +// gate fires before the nil-client check. + func TestHandleContainerCreate_PrivilegedDeniedWhenGateClosed(t *testing.T) { s := gateTestServer(false) - body := createRequest{ + w := postCreate(t, s, createRequest{ Image: "alpine:3.20", HostConfig: &hostConfig{Privileged: true}, - } - w := postCreate(t, s, body) + }) if w.Code != http.StatusForbidden { t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) } @@ -56,11 +53,10 @@ func TestHandleContainerCreate_PrivilegedDeniedWhenGateClosed(t *testing.T) { func TestHandleContainerCreate_CapAddDeniedWhenGateClosed(t *testing.T) { s := gateTestServer(false) - body := createRequest{ + w := postCreate(t, s, createRequest{ Image: "alpine:3.20", HostConfig: &hostConfig{CapAdd: []string{"SYS_ADMIN"}}, - } - w := postCreate(t, s, body) + }) if w.Code != http.StatusForbidden { t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) } @@ -69,45 +65,71 @@ func TestHandleContainerCreate_CapAddDeniedWhenGateClosed(t *testing.T) { } } -func TestHandleContainerCreate_NonPrivilegedRequestNotGated(t *testing.T) { - // With the gate closed but no Privileged/CapAdd in the request, the - // gate must let the request through. It will subsequently fail on - // image lookup (because the zero-value client has no real backend) - // — we just need to confirm we did NOT get the 403 short-circuit. - s := gateTestServer(false) - body := createRequest{ - Image: "alpine:3.20", - HostConfig: &hostConfig{Privileged: false}, +// Pure-function tests for the gate logic. These don't need a Server or a +// containerd client — important because the non-gated paths in +// handleContainerCreate proceed to GetImage which panics on a nil gRPC conn. + +func TestCheckPrivilegedGate_AllowedPassesEverything(t *testing.T) { + cases := []struct { + name string + hc *hostConfig + }{ + {"nil HostConfig", nil}, + {"empty HostConfig", &hostConfig{}}, + {"Privileged=true", &hostConfig{Privileged: true}}, + {"CapAdd=SYS_ADMIN", &hostConfig{CapAdd: []string{"SYS_ADMIN"}}}, + {"both", &hostConfig{Privileged: true, CapAdd: []string{"NET_ADMIN", "SYS_ADMIN"}}}, } - w := postCreate(t, s, body) - if w.Code == http.StatusForbidden { - t.Errorf("non-privileged request was 403'd: %s", w.Body.String()) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + msg, blocked := checkPrivilegedGate(true, tc.hc) + if blocked { + t.Errorf("blocked = true with gate open; msg=%q", msg) + } + if msg != "" { + t.Errorf("msg = %q, want empty when not blocked", msg) + } + }) } } -func TestHandleContainerCreate_NilHostConfigNotGated(t *testing.T) { - // docker run without --privileged sends a HostConfig with zero - // values, but a hand-crafted client could omit HostConfig entirely. - // The gate must not deref a nil HostConfig. - s := gateTestServer(false) - body := createRequest{Image: "alpine:3.20"} - w := postCreate(t, s, body) - if w.Code == http.StatusForbidden { - t.Errorf("nil-HostConfig request was 403'd: %s", w.Body.String()) +func TestCheckPrivilegedGate_ClosedRejectsPrivileged(t *testing.T) { + msg, blocked := checkPrivilegedGate(false, &hostConfig{Privileged: true}) + if !blocked { + t.Fatal("blocked = false, want true") + } + if !strings.Contains(msg, "privileged containers are disabled") { + t.Errorf("msg = %q, want it to mention 'privileged containers are disabled'", msg) } } -func TestHandleContainerCreate_PrivilegedAllowedWhenGateOpen(t *testing.T) { - // Mirror image: gate=true must NOT 403, even with Privileged=true. - // (The handler will still fail later on image lookup against the - // zero-value client, but not with 403.) - s := gateTestServer(true) - body := createRequest{ - Image: "alpine:3.20", - HostConfig: &hostConfig{Privileged: true, CapAdd: []string{"SYS_ADMIN"}}, +func TestCheckPrivilegedGate_ClosedRejectsCapAdd(t *testing.T) { + msg, blocked := checkPrivilegedGate(false, &hostConfig{CapAdd: []string{"SYS_ADMIN"}}) + if !blocked { + t.Fatal("blocked = false, want true") + } + if !strings.Contains(msg, "SYS_ADMIN") { + t.Errorf("msg = %q, want it to echo the requested cap", msg) + } +} + +func TestCheckPrivilegedGate_ClosedAllowsNilHostConfig(t *testing.T) { + // docker run without -H may omit HostConfig entirely; the gate must + // not deref nil. + msg, blocked := checkPrivilegedGate(false, nil) + if blocked { + t.Errorf("nil HostConfig blocked: msg=%q", msg) } - w := postCreate(t, s, body) - if w.Code == http.StatusForbidden { - t.Errorf("gate=true rejected privileged request: %s", w.Body.String()) +} + +func TestCheckPrivilegedGate_ClosedAllowsZeroHostConfig(t *testing.T) { + // The common case: Docker CLI always sends HostConfig, mostly with + // zero fields. The gate must let it through. + msg, blocked := checkPrivilegedGate(false, &hostConfig{ + Binds: []string{"/host:/c"}, + NetworkMode: "bridge", + }) + if blocked { + t.Errorf("non-elevated HostConfig blocked: msg=%q", msg) } } From 8ee6658e1f3503c5f6e1e53e423b071e23a1a1ac Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Thu, 21 May 2026 23:00:00 -0700 Subject: [PATCH 3/4] fix(dind): run privileged gate before the nil-client check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reordering: gate is a request-shape validation, the nil-client check is a runtime-state check; check shape before state. CI caught this — the 403 tests construct a Server without a client (because the gate doesn't need one) and the original ordering returned 500 first. Also makes the operator-facing behavior more useful: a misconfigured deploy that lost its containerd client still rejects privileged requests with the actionable 403 rather than a generic 500. --- pkg/dind/containers.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pkg/dind/containers.go b/pkg/dind/containers.go index e10896b..bd47113 100644 --- a/pkg/dind/containers.go +++ b/pkg/dind/containers.go @@ -226,13 +226,6 @@ func checkPrivilegedGate(allowPrivileged bool, hc *hostConfig) (msg string, bloc } func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { - if s.client == nil { - writeJSON(w, http.StatusInternalServerError, map[string]string{ - "message": "containerd client not available", - }) - return - } - var req createRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeJSON(w, http.StatusBadRequest, map[string]string{ @@ -248,14 +241,22 @@ func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { return } - // Privileged-elevation gate. Reject before any image pull so a job - // can't burn bandwidth probing for a configuration we'll refuse. + // Privileged-elevation gate runs before the client check: it's a + // request-shape validation, not a runtime-state check. This also + // makes the unit tests trivial — they don't need a containerd client. if msg, blocked := checkPrivilegedGate(s.allowPrivileged, req.HostConfig); blocked { s.log.Warn("rejecting elevated container request", "image", req.Image, "reason", msg) writeJSON(w, http.StatusForbidden, map[string]string{"message": msg}) return } + if s.client == nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "message": "containerd client not available", + }) + return + } + id := generateContainerID() name := strings.TrimPrefix(r.URL.Query().Get("name"), "/") From 79c5110db1a394752915dedc408900637d9128e1 Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Thu, 21 May 2026 23:10:00 -0700 Subject: [PATCH 4/4] test(dind): update TestContainerCreateNoImage to expect 400 not 500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After moving the request-shape validation ahead of the runtime nil- client check (so the privileged gate runs without a client), a missing Image field correctly returns 400 from the new ordering instead of 500 from the old nil-client-first ordering. The test name was always "NoImage" — 400 is the right answer; the old 500 assertion was an implementation-detail artifact. --- pkg/dind/dind_test.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/dind/dind_test.go b/pkg/dind/dind_test.go index 6f13d90..067d9de 100644 --- a/pkg/dind/dind_test.go +++ b/pkg/dind/dind_test.go @@ -230,10 +230,12 @@ func TestContainerCreateNoImage(t *testing.T) { } }() - // No containerd client → 500 before image check. - // This validates that the request parsing works. - if resp.StatusCode != http.StatusInternalServerError { - t.Errorf("status = %d, want 500", resp.StatusCode) + // Missing Image field → 400 Bad Request from request validation. + // (Previously this asserted 500 because the nil-client check ran + // first; the handler now validates request shape before checking + // runtime state, so 400 is the user-facing result.) + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d, want 400", resp.StatusCode) } }