diff --git a/mantle/kola/harness.go b/mantle/kola/harness.go index 9cd47106a0..ca94c0c302 100644 --- a/mantle/kola/harness.go +++ b/mantle/kola/harness.go @@ -1747,7 +1747,7 @@ func makeNonExclusiveTest(bucket int, tests []*register.Test, flight platform.Fl return nonExclusiveWrapper } -func reserveMemoryCountForTest(t *register.Test, needed int, logger func(format string, args ...interface{})) bool { +func reserveMemoryCountForTest(t *register.Test, needed int, warnOnWait bool) bool { reservedMemoryCountMutex.Lock() defer reservedMemoryCountMutex.Unlock() avail, err := system.GetCurrentMemAvailableMiB() @@ -1762,10 +1762,14 @@ func reserveMemoryCountForTest(t *register.Test, needed int, logger func(format reservedMemoryCountMiB += needed reserved := reservedMemoryCountMiB t.ReservedMemoryCountMiB = needed - logger("Reserved %d MiB for %s (available: %d MiB, reserved total: %d MiB)", + plog.Debugf("Reserved %d MiB for %s (available: %d MiB, reserved total: %d MiB)", needed, t.Name, avail, reserved) return true } + logger := plog.Debugf + if warnOnWait { + logger = plog.Warningf + } logger("Waiting on memory to run %s: need %d MiB, effective available %d MiB (system: %d MiB, reserved: %d MiB)", t.Name, needed, effective, avail, reservedMemoryCountMiB) return false @@ -1785,15 +1789,15 @@ func waitForMemory(h *harness.H, flight platform.Flight, t *register.Test) { if flight.Platform() == "qemu" { needed := getNeededMemoryMiB(t) start := time.Now() - logger := plog.Debugf - for !reserveMemoryCountForTest(t, needed, logger) { - // After a period of time switch the logger so we get some + warnOnWait := true // warn on first wait + for !reserveMemoryCountForTest(t, needed, warnOnWait) { + // After a period of time switch to log a warning so we get some // info even if debug isn't turned on. if time.Since(start) > 5*time.Minute { - logger = plog.Warningf + warnOnWait = true start = time.Now() // reset counter } else { - logger = plog.Debugf + warnOnWait = false } // sleep between 0 and 20 seconds and try again time.Sleep(time.Duration(rand.Intn(20)) * time.Second) diff --git a/mantle/system/nproc.go b/mantle/system/nproc.go index a91f7d5224..ee5561ae7c 100644 --- a/mantle/system/nproc.go +++ b/mantle/system/nproc.go @@ -176,7 +176,15 @@ func getCgroupMemoryLimitMiB() (uint, error) { } // getCgroupMemoryAvailableMiB returns the available memory within the -// cgroup v2 in MiB (limit - current usage), or math.MaxUint if no limit. +// cgroup v2 in MiB, or math.MaxUint if no limit is set. It computes +// available memory as: limit - (current - inactive_file) where inactive_file +// is not actively used file caches that can be evicted if needed. +// (current - inactive_file) is similar to the "workingSet" calculation over in [1]. +// More context on this also in [2]. This is similar to how /proc/meminfo computes +// MemAvailable by considering reclaimable caches. +// +// [1] https://github.com/kubernetes/kubernetes/blob/ac10370ad2aebde82c2d268dd80d08df0ffc2532/test/e2e/node/node_problem_detector.go#L290-L344 +// [2] https://github.com/kata-containers/kata-containers/issues/10280 func getCgroupMemoryAvailableMiB() (uint, error) { maxBuf, err := os.ReadFile("/sys/fs/cgroup/memory.max") if os.IsNotExist(err) { @@ -200,8 +208,57 @@ func getCgroupMemoryAvailableMiB() (uint, error) { if err != nil { return 0, fmt.Errorf("invalid memory.current value: %w", err) } - if current >= limit { + + // Read inactive_file size from memory.stat to exclude reclaimable + // file-backed memory from the usage calculation. + inactiveFile, err := getCgroupMemoryStatField("inactive_file") + if err != nil { + return 0, err + } + + // Subtract the inactive_file size from the memory.current. This + // cache should always be less than the memory.current but add + // a check and do nothing just in case. + usage := current + if inactiveFile < usage { + usage -= inactiveFile + } + + // This also shouldn't happen, but in case the usage is larger + // than the limit let's just return that there's 0 available memory. + if usage >= limit { + return 0, nil + } + return uint((limit - usage) / (1024 * 1024)), nil +} + +// getCgroupMemoryStatField reads a specific field from +// /sys/fs/cgroup/memory.stat and returns its value in bytes. +// The file contains key-value pairs like "file 123456789". +// Returns 0 if the file does not exist or the field is not found. +func getCgroupMemoryStatField(field string) (uint64, error) { + f, err := os.Open("/sys/fs/cgroup/memory.stat") + if os.IsNotExist(err) { return 0, nil + } else if err != nil { + return 0, fmt.Errorf("reading memory.stat: %w", err) + } + defer f.Close() + + scanner := bufio.NewScanner(f) + for scanner.Scan() { + parts := strings.Fields(scanner.Text()) + if len(parts) == 2 && parts[0] == field { + val, err := strconv.ParseUint(parts[1], 10, 64) + if err != nil { + return 0, fmt.Errorf("parsing memory.stat field %s: %w", field, err) + } + return val, nil + } + } + if err := scanner.Err(); err != nil { + return 0, fmt.Errorf("scanning memory.stat: %w", err) } - return uint((limit - current) / (1024 * 1024)), nil + // Field not found; return 0 so callers degrade gracefully. + return 0, nil }