From ed447b803591383b8aff2f800e7c85a57da0eda2 Mon Sep 17 00:00:00 2001
From: AnilAltinay <aaltinay@google.com>
Date: Thu, 7 May 2026 10:48:26 -0700
Subject: [PATCH] Add debugging output when only one GPU is detected in CUDA
 tests.

When cudaGetDeviceCount returns 1, print environment variables
(CUDA_VISIBLE_DEVICES, NVIDIA_VISIBLE_DEVICES), list /dev/nvidia*,
and run nvidia-smi -L.
Hopefully this helps diagnose issues where not all expected GPUs are visible
to the CUDA application. Right now I cannot reproduce the failure locally.

PiperOrigin-RevId: 912043151
---
 images/gpu/cuda-tests/list_features.cu | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/images/gpu/cuda-tests/list_features.cu b/images/gpu/cuda-tests/list_features.cu
index 6f95cf7f1a..e57de32375 100644
--- a/images/gpu/cuda-tests/list_features.cu
+++ b/images/gpu/cuda-tests/list_features.cu
@@ -20,6 +20,7 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <stdio.h>
+#include <stdlib.h>
 
 #include "cuda_test_util.h"  // NOLINT(build/include)
 
@@ -53,6 +54,22 @@ int main(int argc, char *argv[]) {
   int gpuCount = -1;
   CHECK_CUDA(cudaGetDeviceCount(&gpuCount));
   printf("// Number of GPUs: %d\n", gpuCount);
+  if (gpuCount == 1) {
+    printf("// Warning: Only 1 GPU detected by cudaGetDeviceCount.\n");
+    printf("// Debugging info:\n");
+    const char* cuda_visible_devices = getenv("CUDA_VISIBLE_DEVICES");
+    printf("// CUDA_VISIBLE_DEVICES: %s\n",
+           cuda_visible_devices ? cuda_visible_devices : "unset");
+    const char* nvidia_visible_devices = getenv("NVIDIA_VISIBLE_DEVICES");
+    printf("// NVIDIA_VISIBLE_DEVICES: %s\n",
+           nvidia_visible_devices ? nvidia_visible_devices : "unset");
+    printf("// Device nodes:\n");
+    fflush(stdout);
+    system("ls -l /dev/nvidia* 2>&1 | sed 's/^/\\/\\/ /'");
+    printf("// nvidia-smi -L output:\n");
+    fflush(stdout);
+    system("nvidia-smi -L 2>&1 | sed 's/^/\\/\\/ /'");
+  }
   if (gpuCount >= 2) {
     int canAccessAToB = -1;
     CHECK_CUDA(cudaDeviceCanAccessPeer(&canAccessAToB, 0, 1));