Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 44 additions & 42 deletions cmd/ephemerd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import (
"github.com/ephpm/ephemerd/pkg/providers/forgejo"
"github.com/ephpm/ephemerd/pkg/providers/gitea"
githubProv "github.com/ephpm/ephemerd/pkg/providers/github"
goproxy "github.com/ephpm/ephemerd/pkg/proxies/go"
"github.com/ephpm/ephemerd/pkg/proxies"
goproxy "github.com/ephpm/ephemerd/pkg/proxies/go"
"github.com/ephpm/ephemerd/pkg/runner"
"github.com/ephpm/ephemerd/pkg/runtime"
"github.com/ephpm/ephemerd/pkg/scheduler"
Expand Down Expand Up @@ -271,17 +271,18 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP
}

rt, err := runtime.New(runtime.Config{
Client: ctrdClient,
RunnerDir: rm.Dir(),
RunnerMount: rm.ContainerDir(),
LogDir: joinPath(configDir, "logs"),
DataDir: configDir,
DindEnabled: cfg.Dind.Enabled,
Network: net,
WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(),
WindowsCPUs: cfg.Runner.Windows.CPUCount(),
BuildKit: bk,
Log: log,
Client: ctrdClient,
RunnerDir: rm.Dir(),
RunnerMount: rm.ContainerDir(),
LogDir: joinPath(configDir, "logs"),
DataDir: configDir,
DindEnabled: cfg.Dind.Enabled,
DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(),
Network: net,
WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(),
WindowsCPUs: cfg.Runner.Windows.CPUCount(),
BuildKit: bk,
Log: log,
})
if err != nil {
return fmt.Errorf("creating runtime: %w", err)
Expand Down Expand Up @@ -443,21 +444,22 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP
containerDataDir = "/mnt/ephemerd"
}
rt, err := runtime.New(runtime.Config{
Client: ctrdClient,
RunnerDir: rm.Dir(),
RunnerMount: rm.ContainerDir(),
DefaultImage: cfg.Runner.DefaultImage,
ImagesDir: joinPath(configDir, "images"),
LogDir: joinPath(configDir, "logs"),
DataDir: configDir,
ContainerDataDir: containerDataDir,
DindEnabled: cfg.Dind.Enabled,
CacheProxyEnv: cacheProxyEnvVars,
Network: net,
WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(),
WindowsCPUs: cfg.Runner.Windows.CPUCount(),
BuildKit: bk,
Log: log,
Client: ctrdClient,
RunnerDir: rm.Dir(),
RunnerMount: rm.ContainerDir(),
DefaultImage: cfg.Runner.DefaultImage,
ImagesDir: joinPath(configDir, "images"),
LogDir: joinPath(configDir, "logs"),
DataDir: configDir,
ContainerDataDir: containerDataDir,
DindEnabled: cfg.Dind.Enabled,
DindAllowPrivileged: cfg.Dind.ResolvedAllowPrivileged(),
CacheProxyEnv: cacheProxyEnvVars,
Network: net,
WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(),
WindowsCPUs: cfg.Runner.Windows.CPUCount(),
BuildKit: bk,
Log: log,
})
if err != nil {
return fmt.Errorf("creating runtime: %w", err)
Expand Down Expand Up @@ -524,21 +526,21 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP

// Start scheduler (ties CI provider jobs to container lifecycle)
sched := scheduler.New(scheduler.Config{
Runtime: rt,
Providers: activeProviders,
Artifacts: artifactExtractor,
LinuxDispatcher: linuxDispatcher,
DataDir: configDir,
MaxConcurrent: cfg.Runner.MaxConcurrent,
MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent,
Labels: cfg.Runner.ExtraLabels,
PollInterval: pollInterval(cfg),
WebhookPort: cfg.Webhook.Port,
WebhookSecret: cfg.Webhook.Secret,
TLSCert: cfg.Webhook.TLSCert,
TLSKey: cfg.Webhook.TLSKey,
Tunnel: tunnelProvider,
TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries,
Runtime: rt,
Providers: activeProviders,
Artifacts: artifactExtractor,
LinuxDispatcher: linuxDispatcher,
DataDir: configDir,
MaxConcurrent: cfg.Runner.MaxConcurrent,
MaxMacOSVMs: cfg.VM.MacOS.MaxConcurrent,
Labels: cfg.Runner.ExtraLabels,
PollInterval: pollInterval(cfg),
WebhookPort: cfg.Webhook.Port,
WebhookSecret: cfg.Webhook.Secret,
TLSCert: cfg.Webhook.TLSCert,
TLSKey: cfg.Webhook.TLSKey,
Tunnel: tunnelProvider,
TunnelMaxRetries: cfg.Webhook.TunnelMaxRetries,
JobTimeout: cfg.Runner.ParsedJobTimeout(),
ShutdownTimeout: cfg.Runner.ParsedShutdownTimeout(),
LogRetention: cfg.Log.LogRetentionDuration(),
Expand Down
97 changes: 68 additions & 29 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,20 @@ import (
)

type Config struct {
GitHub GitHubConfig `toml:"github"`
Forgejo ForgejoConfig `toml:"forgejo"`
Gitea GiteaConfig `toml:"gitea"`
GitLab GitLabConfig `toml:"gitlab"`
Woodpecker WoodpeckerConfig `toml:"woodpecker"`
Webhook WebhookConfig `toml:"webhook"`
Containerd ContainerdConfig `toml:"containerd"`
Network NetworkConfig `toml:"network"`
VM VMConfig `toml:"vm"`
GitHub GitHubConfig `toml:"github"`
Forgejo ForgejoConfig `toml:"forgejo"`
Gitea GiteaConfig `toml:"gitea"`
GitLab GitLabConfig `toml:"gitlab"`
Woodpecker WoodpeckerConfig `toml:"woodpecker"`
Webhook WebhookConfig `toml:"webhook"`
Containerd ContainerdConfig `toml:"containerd"`
Network NetworkConfig `toml:"network"`
VM VMConfig `toml:"vm"`
Dind DindConfig `toml:"dind"`
ModuleProxy ModuleProxyConfig `toml:"module_proxy"`
Runner RunnerConfig `toml:"runner"`
Metrics MetricsConfig `toml:"metrics"`
Log LogConfig `toml:"log"`
Metrics MetricsConfig `toml:"metrics"`
Log LogConfig `toml:"log"`
}

// Provider returns the name of the first configured forge provider.
Expand Down Expand Up @@ -77,14 +77,14 @@ type MetricsConfig struct {
// By default, ephemerd uses polling (tunnel = "none").
// Set tunnel = "localtunnel" or "ngrok" for instant webhook delivery.
type WebhookConfig struct {
Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty)
Port int `toml:"port"` // listen port for health endpoint (default 8080)
TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel)
TLSKey string `toml:"tls_key"` // TLS private key path
Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok"
TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL
NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env)
TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5)
Secret string `toml:"secret"` // webhook HMAC secret (auto-generated if empty)
Port int `toml:"port"` // listen port for health endpoint (default 8080)
TLSCert string `toml:"tls_cert"` // TLS certificate path (direct TLS, no tunnel)
TLSKey string `toml:"tls_key"` // TLS private key path
Tunnel string `toml:"tunnel"` // "none" (default, polling), "localtunnel", or "ngrok"
TunnelURL string `toml:"tunnel_url"` // localtunnel: self-hosted server URL
NgrokAuthtoken string `toml:"ngrok_authtoken"` // ngrok auth token (or use NGROK_AUTHTOKEN env)
TunnelMaxRetries int `toml:"tunnel_max_retries"` // max consecutive reconnect failures before falling back to polling (default 5)
}

// NetworkConfig configures container networking.
Expand Down Expand Up @@ -113,6 +113,45 @@ type DindConfig struct {
// Set to 0 to disable eviction (only empty-namespace cleanup runs).
// Default 168h (7 days).
CacheMaxAge time.Duration `toml:"cache_max_age"`

// AllowPrivileged controls whether `docker run --privileged` (or
// HostConfig.Privileged=true / HostConfig.CapAdd) from inside a job
// is honored. When true, a sibling container can request the full
// elevation stack (all caps, all devices, seccomp/apparmor off,
// writable sysfs/cgroupfs) — needed for KIND clusters, nested
// containerd, /dev/fuse-style mounts, etc. When false, such requests
// are rejected with HTTP 403.
//
// SECURITY: a privileged sibling container is effectively root on
// whatever host runs the containerd that backs dind. On Windows and
// macOS hosts that backing containerd lives inside a managed Linux
// VM (WSL2 / Hyper-V / Vz), so an escape only reaches the VM. On a
// Linux host with no VM fence, an escape reaches the bare-metal host
// — set this to false unless every workload is trusted.
//
// Use the pointer form so an empty/missing TOML key is
// distinguishable from an explicit `allow_privileged = false`. See
// ResolvedAllowPrivileged for the default policy.
AllowPrivileged *bool `toml:"allow_privileged"`
}

// ResolvedAllowPrivileged returns whether privileged dind sibling
// containers are allowed, applying the platform-aware default when the
// operator hasn't set the key explicitly.
//
// Default policy:
// - Windows / macOS host → true. The dind containerd backing store
// runs inside a VM that ephemerd manages (WSL2/Hyper-V on Windows,
// Vz on macOS), so the worst-case escape stays inside that VM.
// - Linux host → false. ephemerd runs directly on the host with no
// VM fence, so a privileged escape is bare-metal-host compromise.
// Operators that trust their workloads (e.g. internal CI for KIND
// tests) can opt in via `allow_privileged = true`.
func (d *DindConfig) ResolvedAllowPrivileged() bool {
if d.AllowPrivileged != nil {
return *d.AllowPrivileged
}
return goruntime.GOOS != "linux"
}

// DindCachePruneInterval returns the prune interval with the default
Expand Down Expand Up @@ -148,9 +187,9 @@ type VMConfig struct {
// CrossPlatform enables macOS and Windows VM support. Default true.
// Set to false for platforms like Gitea/Forgejo that only support
// Linux runners — this skips macOS image pulls and Windows VM setup.
CrossPlatform *bool `toml:"cross_platform"`
Linux LinuxVMToml `toml:"linux"`
MacOS MacOSVMToml `toml:"macos"`
CrossPlatform *bool `toml:"cross_platform"`
Linux LinuxVMToml `toml:"linux"`
MacOS MacOSVMToml `toml:"macos"`
}

// CrossPlatformEnabled returns whether macOS/Windows VM support is enabled.
Expand All @@ -165,9 +204,9 @@ func (v *VMConfig) CrossPlatformEnabled() bool {
// LinuxVMToml configures the long-running Linux VM for Linux jobs
// on Windows (Hyper-V) and macOS (Virtualization.framework) hosts.
type LinuxVMToml struct {
Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs
CPUs uint `toml:"cpus"` // virtual CPUs (default: 2)
MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048)
Enabled bool `toml:"enabled"` // enable Linux VM for cross-OS Linux jobs
CPUs uint `toml:"cpus"` // virtual CPUs (default: 2)
MemoryMB uint64 `toml:"memory_mb"` // memory in MB (default: 2048)
DiskSizeGB uint64 `toml:"disk_size_gb"` // sparse disk size in GB (default: 50)
}

Expand All @@ -182,10 +221,10 @@ type MacOSVMToml struct {
// Apple-signed IPSW on first boot and installs stock macOS into
// <data_dir>/vm/macos/base.img. Distinct from the OCI base image
// overlaid per job — that's fetched from the job's image label.
DiskImage string `toml:"disk_image"`
CPUs uint `toml:"cpus"` // CPUs per VM (default: 4)
MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192)
MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs)
DiskImage string `toml:"disk_image"`
CPUs uint `toml:"cpus"` // CPUs per VM (default: 4)
MemoryMB uint64 `toml:"memory_mb"` // memory per VM in MB (default: 8192)
MaxConcurrent int `toml:"max_concurrent"` // max simultaneous macOS VMs (default: auto-detected from host CPUs)
}

type GitHubConfig struct {
Expand Down
81 changes: 81 additions & 0 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"os"
"path/filepath"
goruntime "runtime"
"slices"
"testing"
"time"
Expand Down Expand Up @@ -1766,3 +1767,83 @@ func TestParsedPollInterval_Hours(t *testing.T) {
t.Errorf("PollInterval(2h) = %v, want 2h", d)
}
}

func TestResolvedAllowPrivileged_DefaultByOS(t *testing.T) {
d := DindConfig{}
got := d.ResolvedAllowPrivileged()
wantTrue := goruntime.GOOS != "linux"
if got != wantTrue {
t.Errorf("default ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, wantTrue)
}
}

func TestResolvedAllowPrivileged_ExplicitTrueWins(t *testing.T) {
yes := true
d := DindConfig{AllowPrivileged: &yes}
if !d.ResolvedAllowPrivileged() {
t.Error("explicit allow_privileged=true should resolve true on every OS")
}
}

func TestResolvedAllowPrivileged_ExplicitFalseWins(t *testing.T) {
no := false
d := DindConfig{AllowPrivileged: &no}
if d.ResolvedAllowPrivileged() {
t.Errorf("explicit allow_privileged=false should resolve false on every OS (GOOS=%s)", goruntime.GOOS)
}
}

func TestLoad_DindAllowPrivileged_OmittedUsesPlatformDefault(t *testing.T) {
t.Setenv("GITHUB_TOKEN", "ghp_test123")
tmp := t.TempDir()
path := filepath.Join(tmp, "config.toml")
if err := os.WriteFile(path, []byte(`
[github]
owner = "testorg"

[dind]
enabled = true
`), 0644); err != nil {
t.Fatal(err)
}
cfg, err := Load(path)
if err != nil {
t.Fatalf("Load() error: %v", err)
}
if cfg.Dind.AllowPrivileged != nil {
t.Errorf("AllowPrivileged ptr = %v, want nil (key not set in TOML)", *cfg.Dind.AllowPrivileged)
}
want := goruntime.GOOS != "linux"
if got := cfg.Dind.ResolvedAllowPrivileged(); got != want {
t.Errorf("ResolvedAllowPrivileged on GOOS=%s = %v, want %v", goruntime.GOOS, got, want)
}
}

func TestLoad_DindAllowPrivileged_ExplicitFalseOnAnyOS(t *testing.T) {
t.Setenv("GITHUB_TOKEN", "ghp_test123")
tmp := t.TempDir()
path := filepath.Join(tmp, "config.toml")
if err := os.WriteFile(path, []byte(`
[github]
owner = "testorg"

[dind]
enabled = true
allow_privileged = false
`), 0644); err != nil {
t.Fatal(err)
}
cfg, err := Load(path)
if err != nil {
t.Fatalf("Load() error: %v", err)
}
if cfg.Dind.AllowPrivileged == nil {
t.Fatal("AllowPrivileged ptr is nil; TOML did not bind the explicit false")
}
if *cfg.Dind.AllowPrivileged {
t.Errorf("AllowPrivileged = true, want false")
}
if cfg.Dind.ResolvedAllowPrivileged() {
t.Error("ResolvedAllowPrivileged() should honor explicit false even on non-Linux hosts")
}
}
Loading