diff --git a/src/bpm/cgroups/cgroup.go b/src/bpm/cgroups/cgroup.go index 10609815..2a308609 100644 --- a/src/bpm/cgroups/cgroup.go +++ b/src/bpm/cgroups/cgroup.go @@ -102,6 +102,92 @@ func subsystemGroupingFromProcCgroup(f io.Reader, subsystem string) (string, err return subsystem, nil } +// SelfCgroupPath returns the cgroup v2 unified-mode path of the calling +// process by reading /proc/self/cgroup. Returns an error if no unified-mode +// entry (0::) is found. +func SelfCgroupPath() (string, error) { + f, err := os.Open("/proc/self/cgroup") + if err != nil { + return "", fmt.Errorf("opening /proc/self/cgroup: %w", err) + } + defer f.Close() //nolint:errcheck + return selfCgroupPathFromReader(f) +} + +func selfCgroupPathFromReader(r io.Reader) (string, error) { + s := bufio.NewScanner(r) + for s.Scan() { + line := s.Text() + // cgroup v2 unified-mode line: "0::" + if strings.HasPrefix(line, "0::") { + return strings.TrimRight(strings.TrimPrefix(line, "0::"), "\r\n"), nil + } + } + if err := s.Err(); err != nil { + return "", fmt.Errorf("reading /proc/self/cgroup: %w", err) + } + return "", fmt.Errorf("no cgroup v2 entry found in /proc/self/cgroup") +} + +// ToSystemdCgroupsPath converts an absolute cgroup v2 unified-mode path and +// a container ID into the "slice:prefix:name" format expected by runc's +// systemd cgroup driver. It extracts the parent slice and a unique identifier +// from the first non-slice component (e.g., the garden container scope) to +// ensure the resulting scope name is unique per warden container. +// +// Example: +// +// selfPath = "/system.slice/garden-abc.scope/monit.service" +// containerID = "bpm-uaa" +// result = "system.slice:garden-abc-scope-bpm:bpm-uaa" +func ToSystemdCgroupsPath(selfPath, containerID string) string { + parts := strings.Split(strings.TrimLeft(selfPath, "/"), "/") + + slice := "system.slice" // fallback if no .slice found + uniquePart := "" + + for i, part := range parts { + if strings.HasSuffix(part, ".slice") { + slice = part + if i+1 < len(parts) { + normalized := normalizeForSystemdName(parts[i+1]) + if normalized != "" { + uniquePart = normalized + "-" + } + } + break + } + } + + // If no .slice was found, use the first non-empty path element as a + // uniqueness anchor so the scope name still reflects the host context. + if uniquePart == "" { + for _, part := range parts { + if normalized := normalizeForSystemdName(part); normalized != "" { + uniquePart = normalized + "-" + break + } + } + } + + return fmt.Sprintf("%s:%sbpm:%s", slice, uniquePart, containerID) +} + +// normalizeForSystemdName replaces characters invalid in systemd unit name +// components with dashes. Valid characters are alphanumeric, '-', and '_'. +func normalizeForSystemdName(s string) string { + var b strings.Builder + for _, c := range s { + if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '-' || c == '_' { + b.WriteRune(c) + } else { + b.WriteRune('-') + } + } + return b.String() +} + func mountCgroupTmpfsIfNotPresent(mountInfos []*mountinfo.Info) error { for _, mnt := range mountInfos { if mnt.Mountpoint == cgroupRoot { diff --git a/src/bpm/cgroups/cgroup_test.go b/src/bpm/cgroups/cgroup_test.go index 026fed27..207b83cd 100644 --- a/src/bpm/cgroups/cgroup_test.go +++ b/src/bpm/cgroups/cgroup_test.go @@ -24,6 +24,35 @@ import ( ) var _ = Describe("Cgroups", func() { + Describe("SelfCgroupPath", func() { + It("returns the cgroup v2 path from a valid unified-mode entry", func() { + r := strings.NewReader("0::/garden/abc-123/\n") + path, err := selfCgroupPathFromReader(r) + Expect(err).NotTo(HaveOccurred()) + Expect(path).To(Equal("/garden/abc-123/")) + }) + + It("strips carriage return from CRLF line endings", func() { + r := strings.NewReader("0::/some/path\r\n") + path, err := selfCgroupPathFromReader(r) + Expect(err).NotTo(HaveOccurred()) + Expect(path).To(Equal("/some/path")) + }) + + It("errors when there is no unified-mode entry", func() { + r := strings.NewReader("12:memory:/user.slice\n11:cpu:/user.slice\n") + _, err := selfCgroupPathFromReader(r) + Expect(err).To(HaveOccurred()) + Expect(err).To(MatchError(ContainSubstring("no cgroup v2 entry"))) + }) + + It("errors on empty input", func() { + r := strings.NewReader("") + _, err := selfCgroupPathFromReader(r) + Expect(err).To(HaveOccurred()) + }) + }) + Describe("checking subsystem grouping", func() { var r io.Reader @@ -54,4 +83,21 @@ var _ = Describe("Cgroups", func() { Expect(group).To(Equal("cpu,cpuacct")) }) }) + + Describe("ToSystemdCgroupsPath", func() { + It("converts a nested garden scope path", func() { + Expect(ToSystemdCgroupsPath("/system.slice/garden-abc.scope/monit.service", "bpm-uaa")). + To(Equal("system.slice:garden-abc-scope-bpm:bpm-uaa")) + }) + + It("uses slice name for uniqueness when path has no intermediate scope", func() { + Expect(ToSystemdCgroupsPath("/system.slice", "bpm-uaa")). + To(Equal("system.slice:system-slice-bpm:bpm-uaa")) + }) + + It("uses first path element for uniqueness when no .slice component found", func() { + Expect(ToSystemdCgroupsPath("/garden-abc.scope", "bpm-uaa")). + To(Equal("system.slice:garden-abc-scope-bpm:bpm-uaa")) + }) + }) }) diff --git a/src/bpm/commands/root.go b/src/bpm/commands/root.go index fc7f827e..e3bc4b71 100644 --- a/src/bpm/commands/root.go +++ b/src/bpm/commands/root.go @@ -183,7 +183,7 @@ func newRuncLifecycle() (*lifecycle.RuncLifecycle, error) { return nil, fmt.Errorf("failed to fetch system features: %w", err) } - runcAdapter := adapter.NewRuncAdapter(*features, filepath.Glob, sharedvolume.MakeShared, locks) + runcAdapter := adapter.NewRuncAdapter(*features, filepath.Glob, sharedvolume.MakeShared, locks, cgroupsPathForContainer) return lifecycle.NewRuncLifecycle( runcClient, runcAdapter, @@ -204,6 +204,17 @@ func processByNameFromJobConfig(jobCfg *config.JobConfig, procName string) (*con return nil, fmt.Errorf("invalid process: %s", procName) } +func cgroupsPathForContainer(containerID string) (string, error) { + selfPath, err := cgroups.SelfCgroupPath() + if err != nil { + return "", err + } + if isRunningSystemd() { + return cgroups.ToSystemdCgroupsPath(selfPath, containerID), nil + } + return filepath.Join(selfPath, containerID), nil +} + func isRunningSystemd() bool { systemdSystemDir, err := os.Lstat("/run/systemd/system") if err != nil { diff --git a/src/bpm/runc/adapter/adapter.go b/src/bpm/runc/adapter/adapter.go index ed73c889..77dd5281 100644 --- a/src/bpm/runc/adapter/adapter.go +++ b/src/bpm/runc/adapter/adapter.go @@ -51,18 +51,20 @@ type VolumeLocker interface { } type RuncAdapter struct { - features sysfeat.Features - glob GlobFunc - shareMount MountShare - locker VolumeLocker + features sysfeat.Features + glob GlobFunc + shareMount MountShare + locker VolumeLocker + cgroupsPathFor func(containerID string) (string, error) } -func NewRuncAdapter(features sysfeat.Features, glob GlobFunc, mountSharer MountShare, locker VolumeLocker) *RuncAdapter { +func NewRuncAdapter(features sysfeat.Features, glob GlobFunc, mountSharer MountShare, locker VolumeLocker, cgroupsPathFor func(containerID string) (string, error)) *RuncAdapter { return &RuncAdapter{ - features: features, - glob: glob, - shareMount: mountSharer, - locker: locker, + features: features, + glob: glob, + shareMount: mountSharer, + locker: locker, + cgroupsPathFor: cgroupsPathFor, } } @@ -298,6 +300,12 @@ func (a *RuncAdapter) BuildSpec( specbuilder.Apply(spec, specbuilder.WithPrivileged()) } + if a.cgroupsPathFor != nil { + if cgroupsPath, err := a.cgroupsPathFor(bpmCfg.ContainerID()); err == nil { + specbuilder.Apply(spec, specbuilder.WithCgroupsPath(cgroupsPath)) + } + } + return *spec, nil } diff --git a/src/bpm/runc/adapter/adapter_test.go b/src/bpm/runc/adapter/adapter_test.go index bcdbc4c3..9e40c042 100644 --- a/src/bpm/runc/adapter/adapter_test.go +++ b/src/bpm/runc/adapter/adapter_test.go @@ -55,6 +55,8 @@ var _ = Describe("RuncAdapter", func() { mountSharer *fakeMountSharer volumeLocker *fakeVolumeLocker + + cgroupsPathForFn func(containerID string) (string, error) ) BeforeEach(func() { @@ -86,6 +88,10 @@ var _ = Describe("RuncAdapter", func() { mountSharer = &fakeMountSharer{} volumeLocker = &fakeVolumeLocker{} + + cgroupsPathForFn = func(containerID string) (string, error) { + return "", fmt.Errorf("not on cgroup v2") + } }) JustBeforeEach(func() { @@ -94,7 +100,7 @@ var _ = Describe("RuncAdapter", func() { identityGlob := func(pattern string) ([]string, error) { return []string{pattern}, nil } - runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker) + runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker, cgroupsPathForFn) }) AfterEach(func() { @@ -889,7 +895,7 @@ var _ = Describe("RuncAdapter", func() { identityGlob := func(pattern string) ([]string, error) { return []string{pattern}, nil } - runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker) + runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker, cgroupsPathForFn) }) It("disables seccomp in the spec", func() { @@ -955,7 +961,7 @@ var _ = Describe("RuncAdapter", func() { identityGlob := func(pattern string) ([]string, error) { return []string{pattern}, nil } - runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker) + runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker, cgroupsPathForFn) }) It("includes seccomp in the spec", func() { @@ -1105,7 +1111,7 @@ var _ = Describe("RuncAdapter", func() { return []string{pattern}, nil } } - runcAdapter = NewRuncAdapter(features, fakeGlob, mountSharer.MakeShared, volumeLocker) + runcAdapter = NewRuncAdapter(features, fakeGlob, mountSharer.MakeShared, volumeLocker, cgroupsPathForFn) }) It("adds volumes for whatever the volume matches", func() { @@ -1149,7 +1155,7 @@ var _ = Describe("RuncAdapter", func() { fail := func(path string) ([]string, error) { return nil, errors.New("doomed from the start") } - runcAdapter = NewRuncAdapter(features, fail, mountSharer.MakeShared, volumeLocker) + runcAdapter = NewRuncAdapter(features, fail, mountSharer.MakeShared, volumeLocker, cgroupsPathForFn) }) It("returns an error", func() { @@ -1159,6 +1165,48 @@ var _ = Describe("RuncAdapter", func() { }) }) }) + + Context("cgroup path scoping", func() { + Context("when cgroupsPathForFn returns a path", func() { + BeforeEach(func() { + cgroupsPathForFn = func(containerID string) (string, error) { + return "/scoped/" + containerID, nil + } + }) + + It("sets CgroupsPath to the returned value", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + Expect(spec.Linux.CgroupsPath).To(Equal("/scoped/" + bpmCfg.ContainerID())) + }) + }) + + Context("when cgroupsPathForFn returns an error", func() { + BeforeEach(func() { + cgroupsPathForFn = func(containerID string) (string, error) { + return "", fmt.Errorf("not on cgroup v2") + } + }) + + It("leaves CgroupsPath empty", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + Expect(spec.Linux.CgroupsPath).To(BeEmpty()) + }) + }) + + Context("when cgroupsPathForFn is nil", func() { + BeforeEach(func() { + cgroupsPathForFn = nil + }) + + It("leaves CgroupsPath empty", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + Expect(spec.Linux.CgroupsPath).To(BeEmpty()) + }) + }) + }) }) }) diff --git a/src/bpm/runc/specbuilder/specbuilder.go b/src/bpm/runc/specbuilder/specbuilder.go index 74faa115..2919db8b 100644 --- a/src/bpm/runc/specbuilder/specbuilder.go +++ b/src/bpm/runc/specbuilder/specbuilder.go @@ -228,6 +228,12 @@ func WithPrivileged() SpecOption { } } +func WithCgroupsPath(path string) SpecOption { + return func(spec *specs.Spec) { + spec.Linux.CgroupsPath = path + } +} + func removeNosuidMountOption(opts []string) []string { for i := 0; i < len(opts); i++ { if opts[i] == "nosuid" { diff --git a/src/bpm/runc/specbuilder/specbuilder_test.go b/src/bpm/runc/specbuilder/specbuilder_test.go index 23f82414..c4398cbf 100644 --- a/src/bpm/runc/specbuilder/specbuilder_test.go +++ b/src/bpm/runc/specbuilder/specbuilder_test.go @@ -134,6 +134,17 @@ var _ = Describe("SpecBuilder", func() { }) }) + Describe("WithCgroupsPath", func() { + It("sets the cgroups path on the spec", func() { + spec := specbuilder.DefaultSpec() + Expect(spec.Linux.CgroupsPath).To(BeEmpty()) + + specbuilder.Apply(spec, specbuilder.WithCgroupsPath("/garden/abc-123/bpm.uaa")) + + Expect(spec.Linux.CgroupsPath).To(Equal("/garden/abc-123/bpm.uaa")) + }) + }) + Describe("DefaultSpec", func() { It("includes seccomp by default", func() { spec := specbuilder.DefaultSpec()