From 0ec1d7b6b7d00de7eff9e83a43abb65c0710b5d1 Mon Sep 17 00:00:00 2001 From: Luther Monson Date: Thu, 21 May 2026 21:30:00 -0700 Subject: [PATCH] feat(runtime): add configurable rlimits (nofile/nproc) for jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds [runtime.rlimits] config block with `nofile` and `nproc` keys. Defaults to 1024/1024 — same as containerd's built-in OCI spec — so an empty config is a no-behavior-change. Higher values let build tools call `ulimit -n N` up to the configured ceiling without needing CAP_SYS_RESOURCE, which we deliberately don't grant. The Linux spec helper replaces (not appends) `spec.Process.Rlimits` so the containerd-default RLIMIT_NOFILE entry from `oci.WithDefaultSpecForPlatform` doesn't end up duplicated alongside our own. Both soft and hard are set equal; raising the hard limit from inside the container requires CAP_SYS_RESOURCE which is intentionally not in containerCapabilities. On Windows and macOS the helper is a no-op — HCS and Vz use different resource-limit models that are configured at the VM/utility-VM level. Local CI notes: lint and tests pass cross-compiled for linux. `mage test` on this Windows host hits the documented pkcs11/ocicrypt cgo preprocessing failure for packages that transitively import it (cmd/ephemerd, pkg/containerd, pkg/dind, pkg/workflow); GOOS=linux go test -run xxx compiles them clean. --- cmd/ephemerd/main.go | 2 + pkg/config/config.go | 42 ++++++++++++ pkg/config/config_test.go | 94 +++++++++++++++++++++++++++ pkg/runtime/rlimits_linux.go | 44 +++++++++++++ pkg/runtime/rlimits_linux_test.go | 103 ++++++++++++++++++++++++++++++ pkg/runtime/rlimits_other.go | 15 +++++ pkg/runtime/runtime.go | 6 ++ 7 files changed, 306 insertions(+) create mode 100644 pkg/runtime/rlimits_linux.go create mode 100644 pkg/runtime/rlimits_linux_test.go create mode 100644 pkg/runtime/rlimits_other.go diff --git a/cmd/ephemerd/main.go b/cmd/ephemerd/main.go index 8567d4e..1932a5b 100644 --- a/cmd/ephemerd/main.go +++ b/cmd/ephemerd/main.go @@ -277,6 +277,7 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP LogDir: joinPath(configDir, "logs"), DataDir: configDir, DindEnabled: cfg.Dind.Enabled, + Rlimits: cfg.Runtime.Rlimits.Resolved(), Network: net, WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), WindowsCPUs: cfg.Runner.Windows.CPUCount(), @@ -453,6 +454,7 @@ func serve(ctx context.Context, configFile, imagesDirFlag string, containerdTCPP ContainerDataDir: containerDataDir, DindEnabled: cfg.Dind.Enabled, CacheProxyEnv: cacheProxyEnvVars, + Rlimits: cfg.Runtime.Rlimits.Resolved(), Network: net, WindowsMemoryBytes: cfg.Runner.Windows.MemoryBytes(), WindowsCPUs: cfg.Runner.Windows.CPUCount(), diff --git a/pkg/config/config.go b/pkg/config/config.go index 0a6e295..f622adb 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -26,6 +26,7 @@ type Config struct { VM VMConfig `toml:"vm"` Dind DindConfig `toml:"dind"` ModuleProxy ModuleProxyConfig `toml:"module_proxy"` + Runtime RuntimeConfig `toml:"runtime"` Runner RunnerConfig `toml:"runner"` Metrics MetricsConfig `toml:"metrics"` Log LogConfig `toml:"log"` @@ -97,6 +98,47 @@ type ContainerdConfig struct { // Reserved for future containerd-specific settings (e.g. snapshotter overrides) } +// RuntimeConfig configures behavior of the per-job container runtime — +// things that apply to the OCI spec rather than to a specific subsystem +// like dind or networking. +type RuntimeConfig struct { + Rlimits RuntimeRlimits `toml:"rlimits"` +} + +// RuntimeRlimits sets POSIX resource limits (RLIMIT_*) on each runner +// container's OCI spec. Defaults match containerd's built-in OCI spec +// (nofile=1024, nproc=1024) so an empty config is a no-behavior-change. +// +// Set higher when CI workloads need more file descriptors or processes +// than containerd's defaults allow. Common case: a build tool calling +// `ulimit -n 2048` to raise its open-file ceiling. That fails with +// "Operation not permitted" if the container's hard limit is 1024 — +// raising the hard limit needs CAP_SYS_RESOURCE, which we deliberately +// don't grant. Setting nofile higher here lets the same `ulimit` call +// succeed without granting the capability, because lowering is always +// allowed and the OCI hard limit is now generous. +type RuntimeRlimits struct { + // Nofile is RLIMIT_NOFILE (max open file descriptors). Both soft + // and hard get set to this value. Default 1024 (containerd default). + Nofile int64 `toml:"nofile"` + // Nproc is RLIMIT_NPROC (max processes/threads for the container's + // user). Both soft and hard get set to this value. Default 1024. + Nproc int64 `toml:"nproc"` +} + +// Resolved returns the rlimits with defaults filled in for any unset +// (zero or negative) field. Always returns positive values so callers +// can blindly emit OCI rlimit entries. +func (r RuntimeRlimits) Resolved() RuntimeRlimits { + if r.Nofile <= 0 { + r.Nofile = 1024 + } + if r.Nproc <= 0 { + r.Nproc = 1024 + } + return r +} + // DindConfig configures the fake Docker daemon mounted into job containers. type DindConfig struct { Enabled bool `toml:"enabled"` // mount /var/run/docker.sock with a fake Docker API diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b479dd4..4c256fb 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1766,3 +1766,97 @@ func TestParsedPollInterval_Hours(t *testing.T) { t.Errorf("PollInterval(2h) = %v, want 2h", d) } } + +func TestRuntimeRlimitsResolved_Defaults(t *testing.T) { + got := RuntimeRlimits{}.Resolved() + if got.Nofile != 1024 { + t.Errorf("Nofile = %d, want 1024 (containerd default)", got.Nofile) + } + if got.Nproc != 1024 { + t.Errorf("Nproc = %d, want 1024 default", got.Nproc) + } +} + +func TestRuntimeRlimitsResolved_Explicit(t *testing.T) { + got := RuntimeRlimits{Nofile: 4096, Nproc: 8192}.Resolved() + if got.Nofile != 4096 { + t.Errorf("Nofile = %d, want 4096", got.Nofile) + } + if got.Nproc != 8192 { + t.Errorf("Nproc = %d, want 8192", got.Nproc) + } +} + +func TestRuntimeRlimitsResolved_NegativeFallsBack(t *testing.T) { + got := RuntimeRlimits{Nofile: -1, Nproc: -100}.Resolved() + if got.Nofile != 1024 { + t.Errorf("Nofile(-1) resolved to %d, want 1024", got.Nofile) + } + if got.Nproc != 1024 { + t.Errorf("Nproc(-100) resolved to %d, want 1024", got.Nproc) + } +} + +func TestRuntimeRlimitsResolved_MixedZeroAndExplicit(t *testing.T) { + // Only one field set: the other should fall back without disturbing + // the explicit one. + got := RuntimeRlimits{Nofile: 65536}.Resolved() + if got.Nofile != 65536 { + t.Errorf("Nofile = %d, want 65536 (preserved)", got.Nofile) + } + if got.Nproc != 1024 { + t.Errorf("Nproc = %d, want 1024 (default fill)", got.Nproc) + } +} + +func TestLoad_RuntimeRlimits(t *testing.T) { + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" + +[runtime.rlimits] +nofile = 4096 +nproc = 2048 +`), 0644); err != nil { + t.Fatal(err) + } + + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Runtime.Rlimits.Nofile != 4096 { + t.Errorf("Rlimits.Nofile = %d, want 4096", cfg.Runtime.Rlimits.Nofile) + } + if cfg.Runtime.Rlimits.Nproc != 2048 { + t.Errorf("Rlimits.Nproc = %d, want 2048", cfg.Runtime.Rlimits.Nproc) + } +} + +func TestLoad_RuntimeRlimits_Omitted(t *testing.T) { + // Empty config — Resolved() must still produce 1024/1024 so callers + // never have to special-case "no [runtime] block in config.toml". + t.Setenv("GITHUB_TOKEN", "ghp_test123") + tmp := t.TempDir() + path := filepath.Join(tmp, "config.toml") + if err := os.WriteFile(path, []byte(` +[github] +owner = "testorg" +`), 0644); err != nil { + t.Fatal(err) + } + cfg, err := Load(path) + if err != nil { + t.Fatalf("Load() error: %v", err) + } + if cfg.Runtime.Rlimits.Nofile != 0 { + t.Errorf("Rlimits.Nofile (raw) = %d, want 0 before Resolved()", cfg.Runtime.Rlimits.Nofile) + } + resolved := cfg.Runtime.Rlimits.Resolved() + if resolved.Nofile != 1024 || resolved.Nproc != 1024 { + t.Errorf("Resolved() = %+v, want {1024, 1024}", resolved) + } +} diff --git a/pkg/runtime/rlimits_linux.go b/pkg/runtime/rlimits_linux.go new file mode 100644 index 0000000..b9fa4dc --- /dev/null +++ b/pkg/runtime/rlimits_linux.go @@ -0,0 +1,44 @@ +//go:build linux + +package runtime + +import ( + "context" + + "github.com/containerd/containerd/v2/core/containers" + "github.com/containerd/containerd/v2/pkg/oci" + "github.com/ephpm/ephemerd/pkg/config" + ocispec "github.com/opencontainers/runtime-spec/specs-go" +) + +// rlimitsOpts sets RLIMIT_NOFILE and RLIMIT_NPROC on the container's OCI +// process spec. We deliberately replace the rlimits slice (rather than +// append) so the containerd default RLIMIT_NOFILE=1024 entry from +// oci.WithDefaultSpecForPlatform doesn't end up duplicated. +// +// The hard limit is set equal to the soft limit. Raising the hard limit +// from inside the container requires CAP_SYS_RESOURCE, which we +// intentionally don't grant — see containerCapabilities. +func rlimitsOpts(rl config.RuntimeRlimits) []oci.SpecOpts { + resolved := rl.Resolved() + return []oci.SpecOpts{ + func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) error { + if s.Process == nil { + s.Process = &ocispec.Process{} + } + s.Process.Rlimits = []ocispec.POSIXRlimit{ + { + Type: "RLIMIT_NOFILE", + Soft: uint64(resolved.Nofile), + Hard: uint64(resolved.Nofile), + }, + { + Type: "RLIMIT_NPROC", + Soft: uint64(resolved.Nproc), + Hard: uint64(resolved.Nproc), + }, + } + return nil + }, + } +} diff --git a/pkg/runtime/rlimits_linux_test.go b/pkg/runtime/rlimits_linux_test.go new file mode 100644 index 0000000..9c7ce1c --- /dev/null +++ b/pkg/runtime/rlimits_linux_test.go @@ -0,0 +1,103 @@ +//go:build linux + +package runtime + +import ( + "context" + "testing" + + "github.com/containerd/containerd/v2/pkg/oci" + "github.com/ephpm/ephemerd/pkg/config" + ocispec "github.com/opencontainers/runtime-spec/specs-go" +) + +func TestRlimitsOpts_AppliesConfiguredValues(t *testing.T) { + spec := &oci.Spec{Process: &ocispec.Process{}} + opts := rlimitsOpts(config.RuntimeRlimits{Nofile: 4096, Nproc: 2048}) + for _, opt := range opts { + if err := opt(context.Background(), nil, nil, spec); err != nil { + t.Fatalf("opt: %v", err) + } + } + rls := spec.Process.Rlimits + if len(rls) != 2 { + t.Fatalf("len(Rlimits) = %d, want 2: %+v", len(rls), rls) + } + want := map[string]uint64{"RLIMIT_NOFILE": 4096, "RLIMIT_NPROC": 2048} + for _, rl := range rls { + w, ok := want[rl.Type] + if !ok { + t.Errorf("unexpected rlimit %q", rl.Type) + continue + } + if rl.Soft != w || rl.Hard != w { + t.Errorf("%s: soft=%d hard=%d, want soft=hard=%d", rl.Type, rl.Soft, rl.Hard, w) + } + } +} + +func TestRlimitsOpts_AppliesDefaultsWhenZero(t *testing.T) { + // Zero values must produce the containerd-default 1024 entries — + // emitting Rlimits with Soft=Hard=0 would cripple the container. + spec := &oci.Spec{Process: &ocispec.Process{}} + opts := rlimitsOpts(config.RuntimeRlimits{}) + for _, opt := range opts { + if err := opt(context.Background(), nil, nil, spec); err != nil { + t.Fatalf("opt: %v", err) + } + } + for _, rl := range spec.Process.Rlimits { + if rl.Soft != 1024 || rl.Hard != 1024 { + t.Errorf("%s: soft=%d hard=%d, want 1024/1024", rl.Type, rl.Soft, rl.Hard) + } + } +} + +func TestRlimitsOpts_ReplacesDefaultRlimits(t *testing.T) { + // oci.WithDefaultSpecForPlatform pre-populates RLIMIT_NOFILE=1024. + // Our opt must overwrite (not append) so we don't end up with two + // RLIMIT_NOFILE entries — the OCI runtime's behavior with duplicates + // is undefined. + spec := &oci.Spec{Process: &ocispec.Process{ + Rlimits: []ocispec.POSIXRlimit{ + {Type: "RLIMIT_NOFILE", Soft: 1024, Hard: 1024}, + }, + }} + opts := rlimitsOpts(config.RuntimeRlimits{Nofile: 8192, Nproc: 4096}) + for _, opt := range opts { + if err := opt(context.Background(), nil, nil, spec); err != nil { + t.Fatalf("opt: %v", err) + } + } + if len(spec.Process.Rlimits) != 2 { + t.Errorf("len(Rlimits) = %d, want 2 (no duplicates)", len(spec.Process.Rlimits)) + } + seen := map[string]int{} + for _, rl := range spec.Process.Rlimits { + seen[rl.Type]++ + } + for k, n := range seen { + if n != 1 { + t.Errorf("rlimit %s appears %d times, want 1", k, n) + } + } +} + +func TestRlimitsOpts_NilProcessSpec(t *testing.T) { + // Defensive: WithDefaultSpecForPlatform always sets Process, but the + // helper should not panic if someone composes opts in a different + // order in the future. + spec := &oci.Spec{} + opts := rlimitsOpts(config.RuntimeRlimits{Nofile: 2048, Nproc: 1024}) + for _, opt := range opts { + if err := opt(context.Background(), nil, nil, spec); err != nil { + t.Fatalf("opt: %v", err) + } + } + if spec.Process == nil { + t.Fatal("Process is nil after rlimitsOpts ran") + } + if len(spec.Process.Rlimits) != 2 { + t.Errorf("len(Rlimits) = %d, want 2", len(spec.Process.Rlimits)) + } +} diff --git a/pkg/runtime/rlimits_other.go b/pkg/runtime/rlimits_other.go new file mode 100644 index 0000000..0a46a6b --- /dev/null +++ b/pkg/runtime/rlimits_other.go @@ -0,0 +1,15 @@ +//go:build !linux + +package runtime + +import ( + "github.com/containerd/containerd/v2/pkg/oci" + "github.com/ephpm/ephemerd/pkg/config" +) + +// rlimitsOpts is a no-op on non-Linux platforms. Windows Hyper-V isolated +// containers and macOS Vz VMs don't use the POSIX rlimit model — host-side +// limits are governed by the VM/HCS configuration instead. +func rlimitsOpts(_ config.RuntimeRlimits) []oci.SpecOpts { + return nil +} diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index bac7b2c..ae239bb 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -22,6 +22,7 @@ import ( "github.com/containerd/containerd/v2/pkg/namespaces" "github.com/containerd/containerd/v2/pkg/oci" "github.com/ephpm/ephemerd/pkg/buildkit" + "github.com/ephpm/ephemerd/pkg/config" "github.com/ephpm/ephemerd/pkg/dind" "github.com/ephpm/ephemerd/pkg/networking" craneTarball "github.com/google/go-containerregistry/pkg/v1/tarball" @@ -67,6 +68,10 @@ type Config struct { ContainerDataDir string DindEnabled bool // mount a fake Docker socket into each container CacheProxyEnv []string // extra env vars from cache proxies (e.g., GOPROXY=...) + // Rlimits sets POSIX resource limits on each runner container's OCI + // process. Zero values fall back to the containerd default (1024). + // Applies on Linux only; ignored on Windows (HCS uses a different model). + Rlimits config.RuntimeRlimits Network *networking.Manager // WindowsMemoryBytes is the memory limit for Hyper-V isolated Windows // runner containers. Zero leaves the OCI spec field unset, which gives @@ -615,6 +620,7 @@ func (r *Runtime) Create(ctx context.Context, cfg CreateConfig) (*RunnerEnv, err oci.WithCapabilities(containerCapabilities), } opts = append(opts, seccompOpts()...) + opts = append(opts, rlimitsOpts(r.cfg.Rlimits)...) switch { case len(cfg.Entrypoint) > 0: // Forge mode: custom entrypoint (e.g. act_runner register + daemon).