From 5c995113c20ca909abf5551d9ab987cf4f1cbf78 Mon Sep 17 00:00:00 2001
From: Chloe Crozier <chloecrozier@gmail.com>
Date: Thu, 4 Jun 2026 03:37:53 +0000
Subject: [PATCH 1/2] #54 - Add RTX PRO 6000 support

Updates python/tune_system.py so the existing IGX / DGX Spark detection
paths have a discrete-Blackwell sibling, while keeping every user-facing
message hardware-agnostic.

  - check_peermem_kernel: when /dev/dma_heap/system is present, replaces
    the misleading "load nvidia-peermem" warning with a hardware-agnostic
    INFO that points at the patched-DPDK dma-buf path. Falls back to the
    original WARN on stock-DPDK builds. No GPU-type gate.
  - check_bar1_size: per-GPU 32 GiB Blackwell-class threshold via
    _gpu_name_by_bdf(), so heterogeneous boxes only get the Blackwell
    rule on the Blackwell card. The user-visible message includes the
    actual nvidia-smi product name rather than a hard-coded SKU string.
  - check_cpu_governor: aggregates per-CPU output into one summary line
    so a 256-core system is not buried in 256 identical errors.
  - get_nic_info: returns [] consistently on error paths (was returning
    ([], []) which crashed callers); cached via lru_cache so --check all
    runs ibdev2netdev once and emits the missing-tool warning at most once.

Validated on a 5x RTX PRO 6000 Blackwell SE / 256-core EPYC dev box:
--check peermem produces the new generic INFO, BAR1 verified at 128 GiB
per card, cpu-freq summarizes 256 cores in one line, and the
ibdev2netdev-missing path emits a single WARNING.

Signed-off-by: Chloe Crozier <chloecrozier@gmail.com>
---
 python/tune_system.py | 131 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 110 insertions(+), 21 deletions(-)

diff --git a/python/tune_system.py b/python/tune_system.py
index bb571c3..0f3eef9 100755
--- a/python/tune_system.py
+++ b/python/tune_system.py
@@ -16,6 +16,7 @@
 # limitations under the License.
 
 import argparse
+import functools
 import html
 import logging
 import os
@@ -193,6 +194,40 @@ def is_any_integrated_gpu():
     return False
 
 
+def _dmabuf_gpu_path_available():
+    """
+    Returns True if the kernel exposes the dma-buf path that recent NVIDIA drivers
+    use for GPUDirect in place of nvidia-peermem. The patched DPDK shipped with
+    this repo (dpdk_patches/dmabuf.patch) takes this path on platforms that
+    expose it, which is why peermem is not required there.
+    """
+    return os.path.exists("/dev/dma_heap/system")
+
+
+def _gpu_name_by_bdf():
+    """
+    Returns {pci_bdf: product_name} for every visible NVIDIA GPU, or {} if
+    nvidia-smi is unavailable. Used by per-GPU checks (e.g. check_bar1_size)
+    to apply Blackwell-specific thresholds only to the Blackwell GPU(s) in a
+    heterogeneous system rather than to every GPU in the box.
+    """
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=pci.bus_id,name", "--format=csv,noheader"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+    except (FileNotFoundError, subprocess.CalledProcessError):
+        return {}
+    names = {}
+    for line in result.stdout.splitlines():
+        parts = line.split(",", 1)
+        if len(parts) == 2:
+            names[parts[0].strip()] = parts[1].strip()
+    return names
+
+
 def check_peermem_kernel():
     """
     Check if the nvidia-peermem module for GPUDirect is loaded in the kernel.
@@ -214,6 +249,14 @@ def check_peermem_kernel():
                 "(e.g. GB10 / DGX Spark) where peermem does not apply. Use kind: host_pinned "
                 "in the daqiri YAML for GPUDirect on this platform."
             )
+        elif _dmabuf_gpu_path_available():
+            logging.info(
+                "nvidia-peermem module is not loaded, but /dev/dma_heap/system is "
+                "available. The patched DPDK shipped with this repo "
+                "(dpdk_patches/dmabuf.patch) takes the dma-buf GPUDirect path on "
+                "platforms that expose it and does not need peermem. If you are "
+                "building DAQIRI against stock DPDK, load nvidia-peermem."
+            )
         else:
             logging.warning("nvidia-peermem module is not loaded. GPUDirect may not work.")
 
@@ -264,11 +307,16 @@ def check_gpudirect_support():
             logging.warning(f"GPU {i}: {name.value.decode()} does not have GPUDirect support.")
 
 
+@functools.lru_cache(maxsize=1)
 def get_nic_info():
     """
     Parses the output of `ibdev2netdev -v` to extract and return a list of tuples,
     where each tuple contains the interface name and its PCIe address.
 
+    Cached with lru_cache so --check all (which calls this from check_mrrs,
+    check_max_payload_size, and check_mtu_size) only invokes ibdev2netdev once
+    and only emits the "ibdev2netdev not found" warning once per run.
+
     Returns:
         List[Tuple[str, str]]: A list of tuples containing the IF name and PCIe address
     """
@@ -288,16 +336,17 @@ def get_nic_info():
         return vals
 
     except FileNotFoundError:
-        print(
-            "The ibdev2netdev command is not found. Ensure that it is installed and available in your PATH."
+        logging.warning(
+            "The ibdev2netdev command is not found (try: apt install infiniband-diags). "
+            "Skipping NIC-dependent checks (mrrs, mps, mtu)."
         )
-        return [], []
+        return []
     except subprocess.CalledProcessError as e:
-        print(f"Error while executing ibdev2netdev: {e}")
-        return [], []
+        logging.error(f"Error while executing ibdev2netdev: {e}")
+        return []
     except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        return [], []
+        logging.error(f"Unexpected error while running ibdev2netdev: {e}")
+        return []
 
 
 def get_online_cpus():
@@ -328,30 +377,49 @@ def get_online_cpus():
 def check_cpu_governor():
     """
     Checks if the CPU frequency governor is set to 'performance' for all online CPUs.
+    Output is bucketed by result so a 256-core system does not emit 256 lines when
+    every CPU is in the same state. Per-CPU detail still surfaces if results vary.
     """
     online_cpus = get_online_cpus()
+    total = len(online_cpus)
+
+    by_governor = defaultdict(list)
+    missing = []
+    permission_denied = []
 
     for cpu in online_cpus:
         scaling_governor_path = f"/sys/devices/system/cpu/cpu{cpu}/cpufreq/scaling_governor"
-
         try:
             with open(scaling_governor_path, "r") as f:
-                governor = f.read().strip()
-
-            if governor == "performance":
-                logging.info(f"CPU {cpu}: Governor is correctly set to 'performance'.")
-            else:
-                logging.warning(f"CPU {cpu}: Governor is set to '{governor}', not 'performance'.")
-
+                by_governor[f.read().strip()].append(cpu)
         except FileNotFoundError:
-            logging.error(
-                f"CPU {cpu}: Scaling governor file not found. This CPU may not support frequency scaling."
-            )
+            missing.append(cpu)
         except PermissionError:
-            logging.error(
-                f"CPU {cpu}: Permission denied while accessing scaling governor file. Run as root."
+            permission_denied.append(cpu)
+
+    for governor, cpus in sorted(by_governor.items()):
+        if governor == "performance":
+            logging.info(
+                f"CPU governor: {len(cpus)}/{total} online CPUs set to 'performance'."
+            )
+        else:
+            logging.warning(
+                f"CPU governor: {len(cpus)}/{total} online CPUs set to '{governor}', "
+                "expected 'performance'."
             )
 
+    if missing:
+        logging.error(
+            f"CPU governor: scaling_governor file not found on {len(missing)}/{total} "
+            "online CPUs. The cpufreq driver may not be loaded (e.g. amd-pstate, "
+            "intel_pstate, or cppc_cpufreq). Performance scaling cannot be checked."
+        )
+    if permission_denied:
+        logging.error(
+            f"CPU governor: permission denied reading scaling_governor on "
+            f"{len(permission_denied)}/{total} online CPUs. Run as root."
+        )
+
 
 def check_mrrs():
     """
@@ -610,6 +678,17 @@ def check_bar1_size():
             "There is no resizable BAR1 to enlarge on platforms like GB10 / DGX Spark."
         )
         return
+    # On RTX PRO 6000 Blackwell Server Edition (96 GB GDDR7) the generic
+    # > 1024 MiB threshold passes trivially even with Resizable BAR disabled
+    # (the card still exposes a multi-GiB BAR1 in some platform configs).
+    # 32 GiB is the conservative "rebar is fully unlocked" floor: well below
+    # the 96 GB card capacity but high enough that any platform exposing less
+    # is almost certainly missing Resizable BAR / Above 4G Decoding in BIOS.
+    # The threshold is applied per-GPU via gpu_names below so heterogeneous
+    # boxes (e.g. RTX PRO 6000 + H100) only get the Blackwell rule on the
+    # Blackwell card.
+    BAR1_BLACKWELL_MIN_MIB = 32768  # 32 GiB
+    gpu_names = _gpu_name_by_bdf()
     try:
         # Run nvidia-smi to get BAR1 memory information
         result = subprocess.run(
@@ -640,7 +719,17 @@ def check_bar1_size():
 
             # Once BAR1 size is found, log it
             if current_gpu is not None and bar1_total is not None:
-                if bar1_total > 1024:
+                gpu_name = gpu_names.get(current_gpu, "")
+                gpu_is_blackwell = "Blackwell Server Edition" in gpu_name
+                if gpu_is_blackwell and bar1_total < BAR1_BLACKWELL_MIN_MIB:
+                    logging.warning(
+                        f"GPU {current_gpu} ({gpu_name}): BAR1 size is {bar1_total} MiB. "
+                        f"Expected at least {BAR1_BLACKWELL_MIN_MIB} MiB "
+                        f"({BAR1_BLACKWELL_MIN_MIB // 1024} GiB) with Resizable BAR fully "
+                        "enabled. Check the system BIOS for the Resizable BAR / Above 4G "
+                        "Decoding settings."
+                    )
+                elif bar1_total > 1024:
                     logging.info(f"GPU {current_gpu}: BAR1 size is {bar1_total} MiB.")
                 else:
                     logging.warning(

From a29fecb962b624ad4d0d40a3f52220358ded90ef Mon Sep 17 00:00:00 2001
From: Chloe Crozier <chloecrozier@gmail.com>
Date: Fri, 5 Jun 2026 15:46:50 -0700
Subject: [PATCH 2/2] #54 - Address tune_system.py review feedback

Move BAR1 constant to module scope, fix CPU governor docstring,
and reuse get_nvidia_gpu_info_by_bdf for BAR1 Blackwell detection.

Signed-off-by: Chloe Crozier <chloecrozier@gmail.com>
---
 python/tune_system.py | 38 ++++++++------------------------------
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/python/tune_system.py b/python/tune_system.py
index 0f3eef9..f467f82 100755
--- a/python/tune_system.py
+++ b/python/tune_system.py
@@ -44,6 +44,7 @@
     32.0: 3.938,
     64.0: 7.563,
 }
+BAR1_BLACKWELL_MIN_MIB = 32768  # 32 GiB
 
 
 @dataclass
@@ -204,30 +205,6 @@ def _dmabuf_gpu_path_available():
     return os.path.exists("/dev/dma_heap/system")
 
 
-def _gpu_name_by_bdf():
-    """
-    Returns {pci_bdf: product_name} for every visible NVIDIA GPU, or {} if
-    nvidia-smi is unavailable. Used by per-GPU checks (e.g. check_bar1_size)
-    to apply Blackwell-specific thresholds only to the Blackwell GPU(s) in a
-    heterogeneous system rather than to every GPU in the box.
-    """
-    try:
-        result = subprocess.run(
-            ["nvidia-smi", "--query-gpu=pci.bus_id,name", "--format=csv,noheader"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-    except (FileNotFoundError, subprocess.CalledProcessError):
-        return {}
-    names = {}
-    for line in result.stdout.splitlines():
-        parts = line.split(",", 1)
-        if len(parts) == 2:
-            names[parts[0].strip()] = parts[1].strip()
-    return names
-
-
 def check_peermem_kernel():
     """
     Check if the nvidia-peermem module for GPUDirect is loaded in the kernel.
@@ -377,8 +354,9 @@ def get_online_cpus():
 def check_cpu_governor():
     """
     Checks if the CPU frequency governor is set to 'performance' for all online CPUs.
-    Output is bucketed by result so a 256-core system does not emit 256 lines when
-    every CPU is in the same state. Per-CPU detail still surfaces if results vary.
+    Aggregates results by governor value and logs one summary line per distinct
+    governor (e.g. 256/256 online CPUs set to 'performance'), plus separate counts
+    for CPUs whose scaling_governor file is missing or unreadable.
     """
     online_cpus = get_online_cpus()
     total = len(online_cpus)
@@ -684,11 +662,10 @@ def check_bar1_size():
     # 32 GiB is the conservative "rebar is fully unlocked" floor: well below
     # the 96 GB card capacity but high enough that any platform exposing less
     # is almost certainly missing Resizable BAR / Above 4G Decoding in BIOS.
-    # The threshold is applied per-GPU via gpu_names below so heterogeneous
+    # The threshold is applied per-GPU via gpu_info below so heterogeneous
     # boxes (e.g. RTX PRO 6000 + H100) only get the Blackwell rule on the
     # Blackwell card.
-    BAR1_BLACKWELL_MIN_MIB = 32768  # 32 GiB
-    gpu_names = _gpu_name_by_bdf()
+    gpu_info_by_bdf = get_nvidia_gpu_info_by_bdf()
     try:
         # Run nvidia-smi to get BAR1 memory information
         result = subprocess.run(
@@ -719,7 +696,8 @@ def check_bar1_size():
 
             # Once BAR1 size is found, log it
             if current_gpu is not None and bar1_total is not None:
-                gpu_name = gpu_names.get(current_gpu, "")
+                gpu_bdf = normalize_pci_address(current_gpu) or current_gpu
+                gpu_name = gpu_info_by_bdf.get(gpu_bdf, {}).get("name", "")
                 gpu_is_blackwell = "Blackwell Server Edition" in gpu_name
                 if gpu_is_blackwell and bar1_total < BAR1_BLACKWELL_MIN_MIB:
                     logging.warning(