Skip to content

Commit 78ade5c

Browse files
committed
Merge branch 'main' into ci_cuda_bindings_examples
2 parents 7fa3f76 + 5dd4ac9 commit 78ade5c

19 files changed

Lines changed: 482 additions & 248 deletions

File tree

.gitattributes

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ cuda/_version.py export-subst
66
# we do not own any headers checked in, don't touch them
77
*.h binary
88
*.hpp binary
9-
# Exception: headers we own (cuda_core C++ implementation)
9+
# Exception: headers we own
10+
cuda_bindings/cuda/bindings/_bindings/*.h -binary text diff
11+
cuda_bindings/cuda/bindings/_lib/*.h -binary text diff
1012
cuda_core/cuda/core/_cpp/*.h -binary text diff
1113
cuda_core/cuda/core/_cpp/*.hpp -binary text diff
1214
# git should not convert line endings in PNG files

.pre-commit-config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

@@ -30,6 +30,7 @@ repos:
3030
additional_dependencies:
3131
- https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl
3232
exclude: '.*pixi\.lock'
33+
args: ["--fix"]
3334

3435
- id: no-markdown-in-docs-source
3536
name: Prevent markdown files in docs/source directories
@@ -89,5 +90,6 @@ repos:
8990
args: [--no-pycodestyle]
9091
exclude: ^cuda_bindings/
9192

93+
9294
default_language_version:
9395
python: python3
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
5+
# This package contains test helper utilities that may also be useful for other libraries outside of `cuda.bindings`,
6+
# such as `cuda.core`. These utilities are not part of the public API of `cuda.bindings` and may change without notice.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
5+
from contextlib import contextmanager
6+
7+
import pytest
8+
from cuda.bindings import _nvml as nvml
9+
10+
11+
@contextmanager
12+
def unsupported_before(device: int, expected_device_arch: nvml.DeviceArch | str | None):
13+
device_arch = nvml.device_get_architecture(device)
14+
15+
if isinstance(expected_device_arch, nvml.DeviceArch):
16+
expected_device_arch_int = int(expected_device_arch)
17+
elif expected_device_arch == "FERMI":
18+
expected_device_arch_int = 1
19+
else:
20+
expected_device_arch_int = 0
21+
22+
if expected_device_arch is None or expected_device_arch == "HAS_INFOROM" or device_arch == nvml.DeviceArch.UNKNOWN:
23+
# In this case, we don't /know/ if it will fail, but we are ok if it
24+
# does or does not.
25+
26+
# TODO: There are APIs that are documented as supported only if the
27+
# device has an InfoROM, but I couldn't find a way to detect that. For
28+
# now, they are just handled as "possibly failing".
29+
30+
try:
31+
yield
32+
except nvml.NotSupportedError:
33+
# The API call raised NotSupportedError, so we skip the test, but
34+
# don't fail it
35+
pytest.skip(
36+
f"Unsupported call for device architecture {nvml.DeviceArch(device_arch).name} "
37+
f"on device '{nvml.device_get_name(device)}'"
38+
)
39+
# If the API call worked, just continue
40+
elif int(device_arch) < expected_device_arch_int:
41+
# In this case, we /know/ if will fail, and we want to assert that it does.
42+
with pytest.raises(nvml.NotSupportedError):
43+
yield
44+
# The above call was unsupported, so the rest of the test is skipped
45+
pytest.skip(f"Unsupported before {expected_device_arch.name}, got {nvml.device_get_name(device)}")
46+
else:
47+
# In this case, we /know/ it should work, and if it fails, the test should fail.
48+
yield

cuda_bindings/tests/nvml/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pytest
77
from cuda.bindings import _nvml as nvml
8+
from cuda.bindings._test_helpers.arch_check import unsupported_before # noqa: F401
89

910

1011
class NVMLInitializer:

cuda_bindings/tests/nvml/test_compute_mode.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pytest
88
from cuda.bindings import _nvml as nvml
99

10+
from .conftest import unsupported_before
11+
1012
COMPUTE_MODES = [
1113
nvml.ComputeMode.COMPUTEMODE_DEFAULT,
1214
nvml.ComputeMode.COMPUTEMODE_PROHIBITED,
@@ -16,18 +18,11 @@
1618

1719
@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
1820
def test_compute_mode_supported_nonroot(all_devices):
19-
skip_reasons = set()
2021
for device in all_devices:
21-
try:
22+
with unsupported_before(device, None):
2223
original_compute_mode = nvml.device_get_compute_mode(device)
23-
except nvml.NotSupportedError:
24-
skip_reasons.add(f"nvmlDeviceGetComputeMode not supported for device {device}")
25-
continue
2624

2725
for cm in COMPUTE_MODES:
2826
with pytest.raises(nvml.NoPermissionError):
2927
nvml.device_set_compute_mode(device, cm)
3028
assert original_compute_mode == nvml.device_get_compute_mode(device), "Compute mode shouldn't have changed"
31-
32-
if skip_reasons:
33-
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_gpu.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from cuda.bindings import _nvml as nvml
66

77
from . import util
8+
from .conftest import unsupported_before
89

910

1011
def test_gpu_get_module_id(nvml_init):
@@ -23,23 +24,14 @@ def test_gpu_get_module_id(nvml_init):
2324

2425

2526
def test_gpu_get_platform_info(all_devices):
26-
skip_reasons = set()
2727
for device in all_devices:
2828
if util.is_vgpu(device):
29-
skip_reasons.add(f"Not supported on vGPU device {device}")
30-
continue
29+
pytest.skip(f"Not supported on vGPU device {device}")
3130

32-
# TODO
33-
# if device.feature_dict.board.chip < board_class.Architecture.Blackwell:
34-
# test_utils.skip_test("Not supported on chip before Blackwell")
31+
# Documentation says Blackwell or newer only, but this does seem to pass
32+
# on some newer GPUs.
3533

36-
try:
34+
with unsupported_before(device, None):
3735
platform_info = nvml.device_get_platform_info(device)
38-
except nvml.NotSupportedError:
39-
skip_reasons.add(f"Not supported returned, linkely NVLink is disable for {device}")
40-
continue
4136

4237
assert isinstance(platform_info, nvml.PlatformInfo_v2)
43-
44-
if skip_reasons:
45-
pytest.skip(" ; ".join(skip_reasons))

cuda_bindings/tests/nvml/test_init.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

44
import sys
5+
import warnings
56

67
import pytest
78
from cuda.bindings import _nvml as nvml
@@ -16,6 +17,23 @@ def assert_nvml_is_uninitialized():
1617
nvml.device_get_count_v2()
1718

1819

20+
def test_devices_are_the_same_architecture(all_devices):
21+
# The tests in this directory that use `unsupported_before` will generally
22+
# skip the entire test after the first device that isn't supported is found.
23+
# This means that if subsequent devices are of a different architecture,
24+
# they won't be tested properly. This tests for the (hopefully rare) case
25+
# where a system has devices of different architectures and produces a warning.
26+
27+
all_arches = set(nvml.DeviceArch(nvml.device_get_architecture(device)) for device in all_devices)
28+
29+
if len(all_arches) > 1:
30+
warnings.warn( # noqa: B028
31+
f"System has devices of multiple architectures ({', '.join(x.name for x in all_arches)}). "
32+
f" Some tests may be skipped unexpectedly",
33+
UserWarning,
34+
)
35+
36+
1937
@pytest.mark.skipif(sys.platform == "win32", reason="Test not supported on Windows")
2038
def test_init_ref_count():
2139
"""

cuda_bindings/tests/nvml/test_pynvml.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from cuda.bindings import _nvml as nvml
1111

1212
from . import util
13+
from .conftest import unsupported_before
1314

1415
XFAIL_LEGACY_NVLINK_MSG = "Legacy NVLink test expected to fail."
1516

@@ -66,7 +67,8 @@ def test_device_get_handle_by_pci_bus_id(ngpus, pci_info):
6667
def test_device_get_memory_affinity(handles, scope):
6768
size = 1024
6869
for handle in handles:
69-
node_set = nvml.device_get_memory_affinity(handle, size, scope)
70+
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
71+
node_set = nvml.device_get_memory_affinity(handle, size, scope)
7072
assert node_set is not None
7173
assert len(node_set) == size
7274

@@ -76,7 +78,8 @@ def test_device_get_memory_affinity(handles, scope):
7678
def test_device_get_cpu_affinity_within_scope(handles, scope):
7779
size = 1024
7880
for handle in handles:
79-
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
81+
with unsupported_before(handle, nvml.DeviceArch.KEPLER):
82+
cpu_set = nvml.device_get_cpu_affinity_within_scope(handle, size, scope)
8083
assert cpu_set is not None
8184
assert len(cpu_set) == size
8285

@@ -136,22 +139,22 @@ def test_device_get_p2p_status(handles, index):
136139

137140
def test_device_get_power_usage(ngpus, handles):
138141
for i in range(ngpus):
139-
try:
142+
# Note: documentation says this is supported on Fermi or newer,
143+
# but in practice it fails on some later architectures.
144+
with unsupported_before(handles[i], None):
140145
power_mwatts = nvml.device_get_power_usage(handles[i])
141-
except nvml.NotSupportedError:
142-
pytest.skip("device_get_power_usage not supported")
143146
assert power_mwatts >= 0.0
144147

145148

146149
def test_device_get_total_energy_consumption(ngpus, handles):
147150
for i in range(ngpus):
148-
try:
151+
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
149152
energy_mjoules1 = nvml.device_get_total_energy_consumption(handles[i])
150-
except nvml.NotSupportedError:
151-
pytest.skip("device_get_total_energy_consumption not supported")
153+
152154
for j in range(10): # idle for 150 ms
153155
time.sleep(0.015) # and check for increase every 15 ms
154-
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
156+
with unsupported_before(handles[i], nvml.DeviceArch.VOLTA):
157+
energy_mjoules2 = nvml.device_get_total_energy_consumption(handles[i])
155158
assert energy_mjoules2 >= energy_mjoules1
156159
if energy_mjoules2 > energy_mjoules1:
157160
break
@@ -182,7 +185,8 @@ def test_device_get_memory_info(ngpus, handles):
182185

183186
def test_device_get_utilization_rates(ngpus, handles):
184187
for i in range(ngpus):
185-
urate = nvml.device_get_utilization_rates(handles[i])
188+
with unsupported_before(handles[i], "FERMI"):
189+
urate = nvml.device_get_utilization_rates(handles[i])
186190
assert urate.gpu >= 0
187191
assert urate.memory >= 0
188192

@@ -239,7 +243,8 @@ def test_device_get_utilization_rates(ngpus, handles):
239243

240244
def test_device_get_pcie_throughput(ngpus, handles):
241245
for i in range(ngpus):
242-
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
246+
with unsupported_before(handles[i], nvml.DeviceArch.MAXWELL):
247+
tx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_TX_BYTES)
243248
assert tx_bytes_tp >= 0
244249
rx_bytes_tp = nvml.device_get_pcie_throughput(handles[i], nvml.PcieUtilCounter.PCIE_UTIL_RX_BYTES)
245250
assert rx_bytes_tp >= 0
@@ -271,10 +276,10 @@ def test_device_get_pcie_throughput(ngpus, handles):
271276
def test_device_get_nvlink_capability(ngpus, handles, cap_type):
272277
for i in range(ngpus):
273278
for j in range(nvml.NVLINK_MAX_LINKS):
274-
try:
279+
# By the documentation, this should be supported on PASCAL or newer,
280+
# but this also seems to fail on newer.
281+
with unsupported_before(handles[i], None):
275282
cap = nvml.device_get_nvlink_capability(handles[i], j, cap_type)
276-
except nvml.NotSupportedError:
277-
pytest.skip("NVLink capability not supported")
278283
assert cap >= 0
279284

280285

cuda_core/cuda/core/_device.pyx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,29 @@ class Device:
10341034
total = system.get_num_devices()
10351035
return tuple(cls(device_id) for device_id in range(total))
10361036

1037+
def to_system_device(self) -> 'cuda.core.system.Device':
1038+
"""
1039+
Get the corresponding :class:`cuda.core.system.Device` (which is used
1040+
for NVIDIA Machine Library (NVML) access) for this
1041+
:class:`cuda.core.Device` (which is used for CUDA access).
1042+
1043+
The devices are mapped to one another by their UUID.
1044+
1045+
Returns
1046+
-------
1047+
cuda.core.system.Device
1048+
The corresponding system-level device instance used for NVML access.
1049+
"""
1050+
from cuda.core.system._system import CUDA_BINDINGS_NVML_IS_COMPATIBLE
1051+
1052+
if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
1053+
raise RuntimeError(
1054+
"cuda.core.system.Device requires cuda_bindings 13.1.2+ or 12.9.6+"
1055+
)
1056+
1057+
from cuda.core.system import Device as SystemDevice
1058+
return SystemDevice(uuid=self.uuid)
1059+
10371060
@property
10381061
def device_id(self) -> int:
10391062
"""Return device ordinal."""

0 commit comments

Comments
 (0)