Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions mantle/kola/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -1747,7 +1747,7 @@ func makeNonExclusiveTest(bucket int, tests []*register.Test, flight platform.Fl
return nonExclusiveWrapper
}

func reserveMemoryCountForTest(t *register.Test, needed int, logger func(format string, args ...interface{})) bool {
func reserveMemoryCountForTest(t *register.Test, needed int, warnOnWait bool) bool {
reservedMemoryCountMutex.Lock()
defer reservedMemoryCountMutex.Unlock()
avail, err := system.GetCurrentMemAvailableMiB()
Expand All @@ -1762,10 +1762,14 @@ func reserveMemoryCountForTest(t *register.Test, needed int, logger func(format
reservedMemoryCountMiB += needed
reserved := reservedMemoryCountMiB
t.ReservedMemoryCountMiB = needed
logger("Reserved %d MiB for %s (available: %d MiB, reserved total: %d MiB)",
plog.Debugf("Reserved %d MiB for %s (available: %d MiB, reserved total: %d MiB)",
needed, t.Name, avail, reserved)
return true
}
logger := plog.Debugf
if warnOnWait {
logger = plog.Warningf
}
logger("Waiting on memory to run %s: need %d MiB, effective available %d MiB (system: %d MiB, reserved: %d MiB)",
t.Name, needed, effective, avail, reservedMemoryCountMiB)
return false
Expand All @@ -1785,15 +1789,15 @@ func waitForMemory(h *harness.H, flight platform.Flight, t *register.Test) {
if flight.Platform() == "qemu" {
needed := getNeededMemoryMiB(t)
start := time.Now()
logger := plog.Debugf
for !reserveMemoryCountForTest(t, needed, logger) {
// After a period of time switch the logger so we get some
warnOnWait := true // warn on first wait
for !reserveMemoryCountForTest(t, needed, warnOnWait) {
// After a period of time switch to log a warning so we get some
// info even if debug isn't turned on.
if time.Since(start) > 5*time.Minute {
logger = plog.Warningf
warnOnWait = true
start = time.Now() // reset counter
} else {
logger = plog.Debugf
warnOnWait = false
}
// sleep between 0 and 20 seconds and try again
time.Sleep(time.Duration(rand.Intn(20)) * time.Second)
Expand Down
63 changes: 60 additions & 3 deletions mantle/system/nproc.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,15 @@ func getCgroupMemoryLimitMiB() (uint, error) {
}

// getCgroupMemoryAvailableMiB returns the available memory within the
// cgroup v2 in MiB (limit - current usage), or math.MaxUint if no limit.
// cgroup v2 in MiB, or math.MaxUint if no limit is set. It computes
// available memory as: limit - (current - inactive_file) where inactive_file
// is not actively used file caches that can be evicted if needed.
// (current - inactive_file) is similar to the "workingSet" calculation over in [1].
// More context on this also in [2]. This is similar to how /proc/meminfo computes
// MemAvailable by considering reclaimable caches.
//
// [1] https://github.com/kubernetes/kubernetes/blob/ac10370ad2aebde82c2d268dd80d08df0ffc2532/test/e2e/node/node_problem_detector.go#L290-L344
// [2] https://github.com/kata-containers/kata-containers/issues/10280
func getCgroupMemoryAvailableMiB() (uint, error) {
maxBuf, err := os.ReadFile("/sys/fs/cgroup/memory.max")
if os.IsNotExist(err) {
Expand All @@ -200,8 +208,57 @@ func getCgroupMemoryAvailableMiB() (uint, error) {
if err != nil {
return 0, fmt.Errorf("invalid memory.current value: %w", err)
}
if current >= limit {

// Read inactive_file size from memory.stat to exclude reclaimable
// file-backed memory from the usage calculation.
inactiveFile, err := getCgroupMemoryStatField("inactive_file")
if err != nil {
return 0, err
}

// Subtract the inactive_file size from the memory.current. This
// cache should always be less than the memory.current but add
// a check and do nothing just in case.
usage := current
if inactiveFile < usage {
usage -= inactiveFile
}

// This also shouldn't happen, but in case the usage is larger
// than the limit let's just return that there's 0 available memory.
if usage >= limit {
return 0, nil
}
return uint((limit - usage) / (1024 * 1024)), nil
}

// getCgroupMemoryStatField reads a specific field from
// /sys/fs/cgroup/memory.stat and returns its value in bytes.
// The file contains key-value pairs like "file 123456789".
// Returns 0 if the file does not exist or the field is not found.
func getCgroupMemoryStatField(field string) (uint64, error) {
f, err := os.Open("/sys/fs/cgroup/memory.stat")
if os.IsNotExist(err) {
return 0, nil
} else if err != nil {
return 0, fmt.Errorf("reading memory.stat: %w", err)
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
parts := strings.Fields(scanner.Text())
if len(parts) == 2 && parts[0] == field {
val, err := strconv.ParseUint(parts[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("parsing memory.stat field %s: %w", field, err)
}
return val, nil
}
}
if err := scanner.Err(); err != nil {
return 0, fmt.Errorf("scanning memory.stat: %w", err)
}
return uint((limit - current) / (1024 * 1024)), nil
// Field not found; return 0 so callers degrade gracefully.
return 0, nil
}