From eb65168f54e305a4bbb8dde2a5cfb90e1e7a6ec0 Mon Sep 17 00:00:00 2001 From: Noah Lev Date: Mon, 16 Feb 2026 12:01:20 -0500 Subject: [PATCH] Track hugetlb usage in memory stats Hugetlb pages are not included in standard RSS, so programs using MFD_HUGETLB memfds report artificially low memory. Enable the hugetlb cgroup controller and sum all hugetlb page sizes into a new TSV column. Co-Authored-By: Claude Opus 4.6 --- cgroup.go | 15 +++++++++++---- main.go | 4 ++-- poller.go | 9 +++++---- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cgroup.go b/cgroup.go index 1499f85..b79247e 100644 --- a/cgroup.go +++ b/cgroup.go @@ -15,8 +15,9 @@ import ( // MemoryStat holds memory statistics from the cgroup type MemoryStat struct { - Usage uint64 - Kernel uint64 // Separate kernel memory tracking (v1 only; in v2 it's included in Usage) + Usage uint64 + Kernel uint64 // Separate kernel memory tracking (v1 only; in v2 it's included in Usage) + Hugetlb uint64 // Hugetlb usage (not included in standard RSS) } // CgroupManager provides an abstraction over cgroups v1 and v2 @@ -111,6 +112,7 @@ func newV2Manager(path string, memLimit int64) (*cgroupV2Manager, error) { Memory: &cgroup2.Memory{ Max: &memLimit, }, + HugeTlb: &cgroup2.HugeTlb{}, } // Remove leading slash if present for v2 @@ -142,9 +144,14 @@ func (m *cgroupV2Manager) Stat() (*MemoryStat, error) { // In cgroups v2, kernel memory is included in the total usage and cannot // be queried separately. The separate kmem controller was removed in v2. // See: https://github.com/opencontainers/runtime-spec/issues/1005 + var hugetlb uint64 + for _, h := range stats.Hugetlb { + hugetlb += h.Current + } return &MemoryStat{ - Usage: stats.Memory.Usage, - Kernel: 0, // Not separately tracked in v2 (included in Usage) + Usage: stats.Memory.Usage, + Kernel: 0, // Not separately tracked in v2 (included in Usage) + Hugetlb: hugetlb, }, nil } diff --git a/main.go b/main.go index 3e1b352..89619a2 100644 --- a/main.go +++ b/main.go @@ -228,14 +228,14 @@ func main() { var buf bytes.Buffer bio := bufio.NewWriter(&buf) - if _, err := bio.WriteString("time\trss\tkernel\n"); err != nil { + if _, err := bio.WriteString("time\trss\tkernel\thugetlb\n"); err != nil { log.Fatalf("bio.WriteString: %s", err) } start := stats.Rss[0].Time.UnixNano() for i := 0; i < len(stats.Rss); i++ { r := stats.Rss[i] - line := fmt.Sprintf("%d\t%d\t%d\n", r.Time.UnixNano()-start, r.Value, r.Kernel) + line := fmt.Sprintf("%d\t%d\t%d\t%d\n", r.Time.UnixNano()-start, r.Value, r.Kernel, r.Hugetlb) if _, err := bio.WriteString(line); err != nil { log.Fatalf("bufio.WriteString: %s", err) } diff --git a/poller.go b/poller.go index e6dd04c..f3bea79 100644 --- a/poller.go +++ b/poller.go @@ -11,9 +11,10 @@ import ( ) type Record struct { - Time time.Time - Value uint64 - Kernel uint64 + Time time.Time + Value uint64 + Kernel uint64 + Hugetlb uint64 } type Stats struct { @@ -61,7 +62,7 @@ func (p *Poller) poll(t time.Time, cgroup CgroupManager) error { return fmt.Errorf("cgroup.Stat: %w", err) } - p.stats.Rss = append(p.stats.Rss, Record{t, stats.Usage, stats.Kernel}) + p.stats.Rss = append(p.stats.Rss, Record{t, stats.Usage, stats.Kernel, stats.Hugetlb}) return nil }