diff --git a/cmd/ephemerd/main.go b/cmd/ephemerd/main.go index 8567d4e..fd07ce4 100644 --- a/cmd/ephemerd/main.go +++ b/cmd/ephemerd/main.go @@ -26,8 +26,8 @@ import ( "github.com/ephpm/ephemerd/pkg/providers/forgejo" "github.com/ephpm/ephemerd/pkg/providers/gitea" githubProv "github.com/ephpm/ephemerd/pkg/providers/github" - goproxy "github.com/ephpm/ephemerd/pkg/proxies/go" "github.com/ephpm/ephemerd/pkg/proxies" + goproxy "github.com/ephpm/ephemerd/pkg/proxies/go" "github.com/ephpm/ephemerd/pkg/runner" "github.com/ephpm/ephemerd/pkg/runtime" "github.com/ephpm/ephemerd/pkg/scheduler" @@ -271,17 +271,18 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP } rt, err := runtime.New(runtime.Config{ - Client: ctrdClient, - RunnerDir: rm.Dir(), - RunnerMount: rm.ContainerDir(), - LogDir: joinPath(configDir, "logs"), - DataDir: configDir, - DindEnabled: cfg.Dind.Enabled, - Network: net, - WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), - WindowsCPUs: cfg.Runner.Windows.CPUCount(), - BuildKit: bk, - Log: log, + Client: ctrdClient, + RunnerDir: rm.Dir(), + RunnerMount: rm.ContainerDir(), + LogDir: joinPath(configDir, "logs"), + DataDir: configDir, + DindEnabled: cfg.Dind.Enabled, + DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(), + Network: net, + WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), + WindowsCPUs: cfg.Runner.Windows.CPUCount(), + BuildKit: bk, + Log: log, }) if err != nil { return fmt.Errorf("creating runtime: %w", err) @@ -443,21 +444,22 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP containerDataDir = "/mnt/ephemerd" } rt, err := runtime.New(runtime.Config{ - Client: ctrdClient, - RunnerDir: rm.Dir(), - RunnerMount: rm.ContainerDir(), - DefaultImage: cfg.Runner.DefaultImage, - ImagesDir: joinPath(configDir, "images"), - LogDir: joinPath(configDir, "logs"), - DataDir: configDir, - ContainerDataDir: containerDataDir, - DindEnabled: cfg.Dind.Enabled, - CacheProxyEnv: cacheProxyEnvVars, - Network: net, - WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), - WindowsCPUs: cfg.Runner.Windows.CPUCount(), - BuildKit: bk, - Log: log, + Client: ctrdClient, + RunnerDir: rm.Dir(), + RunnerMount: rm.ContainerDir(), + DefaultImage: cfg.Runner.DefaultImage, + ImagesDir: joinPath(configDir, "images"), + LogDir: joinPath(configDir, "logs"), + DataDir: configDir, + ContainerDataDir: containerDataDir, + DindEnabled: cfg.Dind.Enabled, + DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(), + CacheProxyEnv: cacheProxyEnvVars, + Network: net, + WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), + WindowsCPUs: cfg.Runner.Windows.CPUCount(), + BuildKit: bk, + Log: log, }) if err != nil { return fmt.Errorf("creating runtime: %w", err) @@ -524,21 +526,21 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP // Start scheduler (ties CI provider jobs to container lifecycle) sched := scheduler.New(scheduler.Config{ - Runtime: rt, - Providers: activeProviders, - Artifacts: artifactExtractor, - LinuxDispatcher: linuxDispatcher, - DataDir: configDir, - MaxConcurrent: cfg.Runner.MaxConcurrent, - MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent, - Labels: cfg.Runner.ExtraLabels, - PollInterval: pollInterval(cfg), - WebhookPort: cfg.Webhook.Port, - WebhookSecret: cfg.Webhook.Secret, - TLSCert: cfg.Webhook.TLSCert, - TLSKey: cfg.Webhook.TLSKey, - Tunnel: tunnelProvider, - TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries, + Runtime: rt, + Providers: activeProviders, + Artifacts: artifactExtractor, + LinuxDispatcher: linuxDispatcher, + DataDir: configDir, + MaxConcurrent: cfg.Runner.MaxConcurrent, + MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent, + Labels: cfg.Runner.ExtraLabels, + PollInterval: pollInterval(cfg), + WebhookPort: cfg.Webhook.Port, + WebhookSecret: cfg.Webhook.Secret, + TLSCert: cfg.Webhook.TLSCert, + TLSKey: cfg.Webhook.TLSKey, + Tunnel: tunnelProvider, + TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries, JobTimeout: cfg.Runner.ParsedJobTimeout(), ShutdownTimeout: cfg.Runner.ParsedShutdownTimeout(), LogRetention: cfg.Log.LogRetentionDuration(), diff --git a/pkg/config/config.go b/pkg/config/config.go index 0a6e295..04a7ab0 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -15,20 +15,20 @@ import ( ) type Config struct { - GitHub GitHubConfig `toml:"github"` - Forgejo ForgejoConfig `toml:"forgejo"` - Gitea GiteaConfig `toml:"gitea"` - GitLab GitLabConfig `toml:"gitlab"` - Woodpecker WoodpeckerConfig `toml:"woodpecker"` - Webhook WebhookConfig `toml:"webhook"` - Containerd ContainerdConfig `toml:"containerd"` - Network NetworkConfig `toml:"network"` - VM VMConfig `toml:"vm"` + GitHub GitHubConfig `toml:"github"` + Forgejo ForgejoConfig `toml:"forgejo"` + Gitea GiteaConfig `toml:"gitea"` + GitLab GitLabConfig `toml:"gitlab"` + Woodpecker WoodpeckerConfig `toml:"woodpecker"` + Webhook WebhookConfig `toml:"webhook"` + Containerd ContainerdConfig `toml:"containerd"` + Network NetworkConfig `toml:"network"` + VM VMConfig `toml:"vm"` Dind DindConfig `toml:"dind"` ModuleProxy ModuleProxyConfig `toml:"module_proxy"` Runner RunnerConfig `toml:"runner"` - Metrics MetricsConfig `toml:"metrics"` - Log LogConfig `toml:"log"` + Metrics MetricsConfig `toml:"metrics"` + Log LogConfig `toml:"log"` } // Provider returns the name of the first configured forge provider. @@ -77,14 +77,14 @@ type MetricsConfig struct { // By default, ephemerd uses polling (tunnel = "none"). // Set tunnel = "localtunnel" or "ngrok" for instant webhook delivery. type WebhookConfig struct { - Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty) - Port int `toml:"port"` // listen port for health endpoint (default 8080) - TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel) - TLSKey string `toml:"tls_key"` // TLS private key path - Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok" - TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL - NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env) - TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5) + Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty) + Port int `toml:"port"` // listen port for health endpoint (default 8080) + TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel) + TLSKey string `toml:"tls_key"` // TLS private key path + Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok" + TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL + NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env) + TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5) } // NetworkConfig configures container networking. @@ -113,6 +113,45 @@ type DindConfig struct { // Set to 0 to disable eviction (only empty-namespace cleanup runs). // Default 168h (7 days). CacheMaxAge time.Duration `toml:"cache_max_age"` + + // AllowPrivileged controls whether `docker run --privileged` (or + // HostConfig.Privileged=true / HostConfig.CapAdd) from inside a job + // is honored. When true, a sibling container can request the full + // elevation stack (all caps, all devices, seccomp/apparmor off, + // writable sysfs/cgroupfs) — needed for KIND clusters, nested + // containerd, /dev/fuse-style mounts, etc. When false, such requests + // are rejected with HTTP 403. + // + // SECURITY: a privileged sibling container is effectively root on + // whatever host runs the containerd that backs dind. On Windows and + // macOS hosts that backing containerd lives inside a managed Linux + // VM (WSL2 / Hyper-V / Vz), so an escape only reaches the VM. On a + // Linux host with no VM fence, an escape reaches the bare-metal host + // — set this to false unless every workload is trusted. + // + // Use the pointer form so an empty/missing TOML key is + // distinguishable from an explicit `allow_privileged = false`. See + // ResolvedAllowPrivileged for the default policy. + AllowPrivileged *bool `toml:"allow_privileged"` +} + +// ResolvedAllowPrivileged returns whether privileged dind sibling +// containers are allowed, applying the platform-aware default when the +// operator hasn't set the key explicitly. +// +// Default policy: +// - Windows / macOS host → true. The dind containerd backing store +// runs inside a VM that ephemerd manages (WSL2/Hyper-V on Windows, +// Vz on macOS), so the worst-case escape stays inside that VM. +// - Linux host → false. ephemerd runs directly on the host with no +// VM fence, so a privileged escape is bare-metal-host compromise. +// Operators that trust their workloads (e.g. internal CI for KIND +// tests) can opt in via `allow_privileged = true`. +func (d *DindConfig) ResolvedAllowPrivileged() bool { + if d.AllowPrivileged != nil { + return *d.AllowPrivileged + } + return goruntime.GOOS != "linux" } // DindCachePruneInterval returns the prune interval with the default @@ -148,9 +187,9 @@ type VMConfig struct { // CrossPlatform enables macOS and Windows VM support. Default true. // Set to false for platforms like Gitea/Forgejo that only support // Linux runners — this skips macOS image pulls and Windows VM setup. - CrossPlatform *bool `toml:"cross_platform"` - Linux LinuxVMToml `toml:"linux"` - MacOS MacOSVMToml `toml:"macos"` + CrossPlatform *bool `toml:"cross_platform"` + Linux LinuxVMToml `toml:"linux"` + MacOS MacOSVMToml `toml:"macos"` } // CrossPlatformEnabled returns whether macOS/Windows VM support is enabled. @@ -165,9 +204,9 @@ func (v *VMConfig) CrossPlatformEnabled() bool { // LinuxVMToml configures the long-running Linux VM for Linux jobs // on Windows (Hyper-V) and macOS (Virtualization.framework) hosts. type LinuxVMToml struct { - Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs - CPUs uint `toml:"cpus"` // virtual CPUs (default: 2) - MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048) + Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs + CPUs uint `toml:"cpus"` // virtual CPUs (default: 2) + MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048) DiskSizeGB uint64 `toml:"disk_size_gb"` // sparse disk size in GB (default: 50) } @@ -182,10 +221,10 @@ type MacOSVMToml struct { // Apple-signed IPSW on first boot and installs stock macOS into // /vm/macos/base.img. Distinct from the OCI base image // overlaid per job — that's fetched from the job's image label. - DiskImage string `toml:"disk_image"` - CPUs uint `toml:"cpus"` // CPUs per VM (default: 4) - MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192) - MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs) + DiskImage string `toml:"disk_image"` + CPUs uint `toml:"cpus"` // CPUs per VM (default: 4) + MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192) + MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs) } type GitHubConfig struct { diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b479dd4..8dad1a2 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -5,6 +5,7 @@ import ( "context" "os" "path/filepath" + goruntime "runtime" "slices" "testing" "time" @@ -1766,3 +1767,83 @@ func TestParsedPollInterval_Hours(t *testing.T) { t.Errorf("PollInterval(2h) = %v, want 2h", d) } } + +func TestResolvedAllowPrivileged_DefaultByOS(t *testing.T) { + d := DindConfig{} + got := d.ResolvedAllowPrivileged() + wantTrue := goruntime.GOOS != "linux" + if got != wantTrue { + t.Errorf("default ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, wantTrue) + } +} + +func TestResolvedAllowPrivileged_ExplicitTrueWins(t *testing.T) { + yes := true + d := DindConfig{AllowPrivileged: &yes} + if !d.ResolvedAllowPrivileged() { + t.Error("explicit allow_privileged=true should resolve true on every OS") + } +} + +func TestResolvedAllowPrivileged_ExplicitFalseWins(t *testing.T) { + no := false + d := DindConfig{AllowPrivileged: &no} + if d.ResolvedAllowPrivileged() { + t.Errorf("explicit allow_privileged=false should resolve false on every OS (GOOS=%s)", goruntime.GOOS) + } +} + +func TestLoad_DindAllowPrivileged_OmittedUsesPlatformDefault(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" + +[dind] +enabled = true +`), 0644); err != nil { + t.Fatal(err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Dind.AllowPrivileged != nil { + t.Errorf("AllowPrivileged ptr = %v, want nil (key not set in TOML)", *cfg.Dind.AllowPrivileged) + } + want := goruntime.GOOS != "linux" + if got := cfg.Dind.ResolvedAllowPrivileged(); got != want { + t.Errorf("ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, want) + } +} + +func TestLoad_DindAllowPrivileged_ExplicitFalseOnAnyOS(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" + +[dind] +enabled = true +allow_privileged = false +`), 0644); err != nil { + t.Fatal(err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Dind.AllowPrivileged == nil { + t.Fatal("AllowPrivileged ptr is nil; TOML did not bind the explicit false") + } + if *cfg.Dind.AllowPrivileged { + t.Errorf("AllowPrivileged = true, want false") + } + if cfg.Dind.ResolvedAllowPrivileged() { + t.Error("ResolvedAllowPrivileged() should honor explicit false even on non-Linux hosts") + } +} diff --git a/pkg/dind/containers.go b/pkg/dind/containers.go index db84e09..bd47113 100644 --- a/pkg/dind/containers.go +++ b/pkg/dind/containers.go @@ -111,17 +111,17 @@ type createRequest struct { } type hostConfig struct { - Binds []string `json:"Binds"` - NetworkMode string `json:"NetworkMode"` - Privileged bool `json:"Privileged"` - SecurityOpt []string `json:"SecurityOpt"` - CapAdd []string `json:"CapAdd"` - Tmpfs map[string]string `json:"Tmpfs"` - PortBindings map[string][]portBinding `json:"PortBindings"` - RestartPolicy *restartPolicy `json:"RestartPolicy"` - Init *bool `json:"Init"` - CgroupnsMode string `json:"CgroupnsMode"` - ExtraHosts []string `json:"ExtraHosts"` + Binds []string `json:"Binds"` + NetworkMode string `json:"NetworkMode"` + Privileged bool `json:"Privileged"` + SecurityOpt []string `json:"SecurityOpt"` + CapAdd []string `json:"CapAdd"` + Tmpfs map[string]string `json:"Tmpfs"` + PortBindings map[string][]portBinding `json:"PortBindings"` + RestartPolicy *restartPolicy `json:"RestartPolicy"` + Init *bool `json:"Init"` + CgroupnsMode string `json:"CgroupnsMode"` + ExtraHosts []string `json:"ExtraHosts"` } type portBinding struct { @@ -207,14 +207,25 @@ func (s *Server) resolveContainerID(nameOrID string) string { return nameOrID } -func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { - if s.client == nil { - writeJSON(w, http.StatusInternalServerError, map[string]string{ - "message": "containerd client not available", - }) - return +// checkPrivilegedGate returns a user-facing rejection message and blocked=true +// when the request asks for elevation (Privileged=true or CapAdd) but the gate +// is closed (allowPrivileged=false). Otherwise blocked=false and msg is empty. +// Pure function so the handler stays simple and tests don't need a containerd +// client to exercise the gate logic. +func checkPrivilegedGate(allowPrivileged bool, hc *hostConfig) (msg string, blocked bool) { + if allowPrivileged || hc == nil { + return "", false } + if hc.Privileged { + return "privileged containers are disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", true + } + if len(hc.CapAdd) > 0 { + return fmt.Sprintf("--cap-add (%v) is disabled on this host (set dind.allow_privileged = true in ephemerd config to enable)", hc.CapAdd), true + } + return "", false +} +func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { var req createRequest if err := json.NewDecoder(r.Body).Decode(&req); err != nil { writeJSON(w, http.StatusBadRequest, map[string]string{ @@ -230,6 +241,22 @@ func (s *Server) handleContainerCreate(w http.ResponseWriter, r *http.Request) { return } + // Privileged-elevation gate runs before the client check: it's a + // request-shape validation, not a runtime-state check. This also + // makes the unit tests trivial — they don't need a containerd client. + if msg, blocked := checkPrivilegedGate(s.allowPrivileged, req.HostConfig); blocked { + s.log.Warn("rejecting elevated container request", "image", req.Image, "reason", msg) + writeJSON(w, http.StatusForbidden, map[string]string{"message": msg}) + return + } + + if s.client == nil { + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "message": "containerd client not available", + }) + return + } + id := generateContainerID() name := strings.TrimPrefix(r.URL.Query().Get("name"), "/") @@ -1563,4 +1590,3 @@ func writeContainerHosts(entry *containerEntry) error { return os.WriteFile(entry.HostsPath, []byte(b.String()), 0o644) } - diff --git a/pkg/dind/dind.go b/pkg/dind/dind.go index aab80c7..5fb5e86 100644 --- a/pkg/dind/dind.go +++ b/pkg/dind/dind.go @@ -3,7 +3,16 @@ // Each job gets its own server instance listening on a Unix socket. // The socket is bind-mounted into the job container at /var/run/docker.sock. // Docker CLI calls inside the container are translated into containerd -// operations on the host — no real Docker daemon, no privileged containers. +// operations on the host — no real Docker daemon. +// +// Sibling containers created through this API can opt into the full +// docker --privileged elevation stack (all caps, all devices, seccomp +// and apparmor unconfined, writable sysfs/cgroupfs) when the host is +// configured to allow it. See Server.allowPrivileged and +// config.DindConfig.AllowPrivileged for the threat model — the short +// version is that on Windows and macOS hosts the dind containerd lives +// inside a managed Linux VM, so an escape stays in that VM; on a Linux +// host with no VM fence the default is to deny privileged requests. package dind import ( @@ -33,21 +42,22 @@ const sharedNamespace = "ephemerd" // Server is a per-job fake Docker daemon. type Server struct { - jobID string - jobNamespace string // per-job containerd namespace for isolation - cacheNamespace string // per-(provider,repo) shared image cache namespace; empty disables caching - sockPath string // host-side unix socket path (Linux/macOS only) - endpoint string // what the container should set DOCKER_HOST to (e.g. "tcp://gw:port" on Windows) - listener net.Listener - server *http.Server - client *client.Client - network *networking.Manager - buildkit *buildkit.Server // shared embedded BuildKit solver (nil → fall back to platform default) - runnerNetNS string // path to runner container's net namespace; used to install DNAT rules for port bindings - log *slog.Logger + jobID string + jobNamespace string // per-job containerd namespace for isolation + cacheNamespace string // per-(provider,repo) shared image cache namespace; empty disables caching + sockPath string // host-side unix socket path (Linux/macOS only) + endpoint string // what the container should set DOCKER_HOST to (e.g. "tcp://gw:port" on Windows) + listener net.Listener + server *http.Server + client *client.Client + network *networking.Manager + buildkit *buildkit.Server // shared embedded BuildKit solver (nil → fall back to platform default) + runnerNetNS string // path to runner container's net namespace; used to install DNAT rules for port bindings + allowPrivileged bool // gate for docker run --privileged / --cap-add; see config.DindConfig.AllowPrivileged + log *slog.Logger mu sync.Mutex - images map[string]*imageEntry // in-memory image store scoped to this job + images map[string]*imageEntry // in-memory image store scoped to this job containers map[string]*containerEntry // containers created through this socket execs map[string]*execEntry // exec processes inside containers networks map[string]*networkEntry // Docker networks (logical, in-memory) @@ -100,6 +110,13 @@ type Config struct { // the sibling's container IP. Empty disables port forwarding. RunnerNetNS string + // AllowPrivileged controls whether sibling containers may opt into + // the full elevation stack via HostConfig.Privileged or via + // HostConfig.CapAdd. When false, requests carrying either are + // rejected with HTTP 403. See config.DindConfig.AllowPrivileged for + // the threat model. + AllowPrivileged bool + Log *slog.Logger } @@ -122,18 +139,19 @@ func New(cfg Config) (*Server, error) { jobID: cfg.JobID, // containerd namespace name regex (^[A-Za-z0-9]+(?:[._-](?:[A-Za-z0-9]+))*$) // rejects slashes. Use hyphens to namespace per-job dind state. - jobNamespace: "ephemerd-dind-" + cfg.JobID, - cacheNamespace: CacheNamespace(cfg.Provider, cfg.Repo), - sockPath: sockPath, - client: cfg.Client, - network: cfg.Network, - buildkit: cfg.BuildKit, - runnerNetNS: cfg.RunnerNetNS, - log: cfg.Log.With("component", "dind", "job_id", cfg.JobID), - images: make(map[string]*imageEntry), - containers: make(map[string]*containerEntry), - execs: make(map[string]*execEntry), - networks: make(map[string]*networkEntry), + jobNamespace: "ephemerd-dind-" + cfg.JobID, + cacheNamespace: CacheNamespace(cfg.Provider, cfg.Repo), + sockPath: sockPath, + client: cfg.Client, + network: cfg.Network, + buildkit: cfg.BuildKit, + runnerNetNS: cfg.RunnerNetNS, + allowPrivileged: cfg.AllowPrivileged, + log: cfg.Log.With("component", "dind", "job_id", cfg.JobID), + images: make(map[string]*imageEntry), + containers: make(map[string]*containerEntry), + execs: make(map[string]*execEntry), + networks: make(map[string]*networkEntry), } s.initDefaultBridgeNetwork() return s, nil diff --git a/pkg/dind/dind_test.go b/pkg/dind/dind_test.go index 6f13d90..067d9de 100644 --- a/pkg/dind/dind_test.go +++ b/pkg/dind/dind_test.go @@ -230,10 +230,12 @@ func TestContainerCreateNoImage(t *testing.T) { } }() - // No containerd client → 500 before image check. - // This validates that the request parsing works. - if resp.StatusCode != http.StatusInternalServerError { - t.Errorf("status = %d, want 500", resp.StatusCode) + // Missing Image field → 400 Bad Request from request validation. + // (Previously this asserted 500 because the nil-client check ran + // first; the handler now validates request shape before checking + // runtime state, so 400 is the user-facing result.) + if resp.StatusCode != http.StatusBadRequest { + t.Errorf("status = %d, want 400", resp.StatusCode) } } diff --git a/pkg/dind/privileged_gate_test.go b/pkg/dind/privileged_gate_test.go new file mode 100644 index 0000000..c6d034f --- /dev/null +++ b/pkg/dind/privileged_gate_test.go @@ -0,0 +1,135 @@ +package dind + +import ( + "bytes" + "encoding/json" + "io" + "log/slog" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +// gateTestServer returns a Server with only the fields handleContainerCreate +// looks at on the 403 short-circuit path: a non-nil logger and the gate flag. +// The client stays nil so the handler returns 500 from the early nil-check — +// fine for the 403 tests since they should never reach that branch. +func gateTestServer(allow bool) *Server { + return &Server{ + allowPrivileged: allow, + log: slog.New(slog.NewTextHandler(io.Discard, nil)), + } +} + +func postCreate(t *testing.T, s *Server, body createRequest) *httptest.ResponseRecorder { + t.Helper() + raw, err := json.Marshal(body) + if err != nil { + t.Fatalf("marshal: %v", err) + } + req := httptest.NewRequest(http.MethodPost, "/containers/create", bytes.NewReader(raw)) + w := httptest.NewRecorder() + s.handleContainerCreate(w, req) + return w +} + +// 403 short-circuit tests: these exercise the full handler path because the +// gate fires before the nil-client check. + +func TestHandleContainerCreate_PrivilegedDeniedWhenGateClosed(t *testing.T) { + s := gateTestServer(false) + w := postCreate(t, s, createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{Privileged: true}, + }) + if w.Code != http.StatusForbidden { + t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte("privileged containers are disabled")) { + t.Errorf("body did not mention privileged disabled: %s", w.Body.String()) + } +} + +func TestHandleContainerCreate_CapAddDeniedWhenGateClosed(t *testing.T) { + s := gateTestServer(false) + w := postCreate(t, s, createRequest{ + Image: "alpine:3.20", + HostConfig: &hostConfig{CapAdd: []string{"SYS_ADMIN"}}, + }) + if w.Code != http.StatusForbidden { + t.Fatalf("status = %d, want 403; body=%s", w.Code, w.Body.String()) + } + if !bytes.Contains(w.Body.Bytes(), []byte("SYS_ADMIN")) { + t.Errorf("body should echo requested cap: %s", w.Body.String()) + } +} + +// Pure-function tests for the gate logic. These don't need a Server or a +// containerd client — important because the non-gated paths in +// handleContainerCreate proceed to GetImage which panics on a nil gRPC conn. + +func TestCheckPrivilegedGate_AllowedPassesEverything(t *testing.T) { + cases := []struct { + name string + hc *hostConfig + }{ + {"nil HostConfig", nil}, + {"empty HostConfig", &hostConfig{}}, + {"Privileged=true", &hostConfig{Privileged: true}}, + {"CapAdd=SYS_ADMIN", &hostConfig{CapAdd: []string{"SYS_ADMIN"}}}, + {"both", &hostConfig{Privileged: true, CapAdd: []string{"NET_ADMIN", "SYS_ADMIN"}}}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + msg, blocked := checkPrivilegedGate(true, tc.hc) + if blocked { + t.Errorf("blocked = true with gate open; msg=%q", msg) + } + if msg != "" { + t.Errorf("msg = %q, want empty when not blocked", msg) + } + }) + } +} + +func TestCheckPrivilegedGate_ClosedRejectsPrivileged(t *testing.T) { + msg, blocked := checkPrivilegedGate(false, &hostConfig{Privileged: true}) + if !blocked { + t.Fatal("blocked = false, want true") + } + if !strings.Contains(msg, "privileged containers are disabled") { + t.Errorf("msg = %q, want it to mention 'privileged containers are disabled'", msg) + } +} + +func TestCheckPrivilegedGate_ClosedRejectsCapAdd(t *testing.T) { + msg, blocked := checkPrivilegedGate(false, &hostConfig{CapAdd: []string{"SYS_ADMIN"}}) + if !blocked { + t.Fatal("blocked = false, want true") + } + if !strings.Contains(msg, "SYS_ADMIN") { + t.Errorf("msg = %q, want it to echo the requested cap", msg) + } +} + +func TestCheckPrivilegedGate_ClosedAllowsNilHostConfig(t *testing.T) { + // docker run without -H may omit HostConfig entirely; the gate must + // not deref nil. + msg, blocked := checkPrivilegedGate(false, nil) + if blocked { + t.Errorf("nil HostConfig blocked: msg=%q", msg) + } +} + +func TestCheckPrivilegedGate_ClosedAllowsZeroHostConfig(t *testing.T) { + // The common case: Docker CLI always sends HostConfig, mostly with + // zero fields. The gate must let it through. + msg, blocked := checkPrivilegedGate(false, &hostConfig{ + Binds: []string{"/host:/c"}, + NetworkMode: "bridge", + }) + if blocked { + t.Errorf("non-elevated HostConfig blocked: msg=%q", msg) + } +} diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index bac7b2c..20126bb 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -10,9 +10,9 @@ import ( "os" "os/exec" "path/filepath" + goruntime "runtime" "strings" "sync" - goruntime "runtime" "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd/v2/core/containers" @@ -65,9 +65,14 @@ type Config struct { // DataDir must be rewritten to that VM-side path. When empty, falls // back to DataDir. ContainerDataDir string - DindEnabled bool // mount a fake Docker socket into each container - CacheProxyEnv []string // extra env vars from cache proxies (e.g., GOPROXY=...) - Network *networking.Manager + DindEnabled bool // mount a fake Docker socket into each container + // DindAllowPrivileged is forwarded to each per-job dind.Server. + // When false, requests carrying HostConfig.Privileged=true or + // HostConfig.CapAdd are rejected with HTTP 403. See + // config.DindConfig.AllowPrivileged for the threat model. + DindAllowPrivileged bool + CacheProxyEnv []string // extra env vars from cache proxies (e.g., GOPROXY=...) + Network *networking.Manager // WindowsMemoryBytes is the memory limit for Hyper-V isolated Windows // runner containers. Zero leaves the OCI spec field unset, which gives // the HCS default (~1 GB) — too small for MSVC builds. Caller should @@ -101,8 +106,8 @@ func (r *Runtime) Client() *client.Client { // RunnerEnv represents a running runner environment. type RunnerEnv struct { ID string - Netns string // network namespace path (Linux only) - RunnerDir string // per-job runner copy, cleaned up on destroy + Netns string // network namespace path (Linux only) + RunnerDir string // per-job runner copy, cleaned up on destroy Dind *dind.Server // per-job fake Docker daemon (nil if disabled) Container client.Container Task client.Task @@ -691,14 +696,15 @@ func (r *Runtime) Create(ctx context.Context, cfg CreateConfig) (*RunnerEnv, err if r.cfg.DindEnabled { var err error dindServer, err = dind.New(dind.Config{ - JobID: id, - Provider: cfg.Provider, - Repo: cfg.Repo, - DataDir: r.cfg.DataDir, - Client: r.client, - Network: r.cfg.Network, - BuildKit: r.cfg.BuildKit, - Log: r.cfg.Log, + JobID: id, + Provider: cfg.Provider, + Repo: cfg.Repo, + DataDir: r.cfg.DataDir, + Client: r.client, + Network: r.cfg.Network, + BuildKit: r.cfg.BuildKit, + AllowPrivileged: r.cfg.DindAllowPrivileged, + Log: r.cfg.Log, }) if err != nil { return nil, fmt.Errorf("creating dind server for %s: %w", id, err)