Skip to content

Commit 533f158

Browse files
committed
Merge remote-tracking branch 'upstream/main' into cuda.core.system-events
2 parents 187274a + 67b18e2 commit 533f158

8 files changed

Lines changed: 279 additions & 19 deletions

File tree

cuda_bindings/cuda/bindings/_lib/utils.pxd.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ cdef class _HelperCUpointer_attribute:
5050
# Return values
5151
cdef driver.CUcontext _ctx
5252
cdef unsigned int _uint
53+
cdef int _int
5354
cdef driver.CUdeviceptr _devptr
5455
cdef void** _void
5556
cdef driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS _token

cuda_bindings/cuda/bindings/_lib/utils.pxi.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,14 @@ cdef class _HelperCUpointer_attribute:
247247
else:
248248
self._cptr = <void*><void_ptr>init_value.getPtr()
249249
elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_MEMORY_TYPE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,{{endif}}
250-
{{if 'CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,{{endif}}
251250
{{if 'CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,{{endif}}
252251
{{if 'CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE,{{endif}}
253252
{{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAGS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS,{{endif}}):
254253
self._uint = init_value
255254
self._cptr = <void*>&self._uint
255+
elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,{{endif}}):
256+
self._int = init_value
257+
self._cptr = <void*>&self._int
256258
elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_DEVICE_POINTER'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER,{{endif}}
257259
{{if 'CU_POINTER_ATTRIBUTE_RANGE_START_ADDR'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR,{{endif}}):
258260
if self._is_getter:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
.. module:: cuda.bindings
5+
6+
``cuda-bindings`` 13.1.X Release notes
7+
======================================
8+
9+
Highlights
10+
----------
11+
12+
Experimental
13+
------------
14+
15+
Bugfixes
16+
--------
17+
18+
* Fixed an issue where the ``CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL`` attribute was
19+
retrieved as an unsigned int, rather than a signed int.
20+
(`PR #1336 <https://github.com/NVIDIA/cuda-python/pull/1336>`_)
21+
22+
Known issues
23+
------------
24+
25+
* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``.
26+
* The graphics APIs in ``cuda.bindings.runtime`` are inadvertently disabled in 13.0.2. Users needing these APIs should update to 13.0.3.

cuda_bindings/tests/test_cuda.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,22 @@ def test_cuda_pointer_attr():
370370
assert err == cuda.CUresult.CUDA_SUCCESS
371371

372372

373+
@pytest.mark.skipif(
374+
driverVersionLessThan(11030) or not supportsManagedMemory(), reason="When new attributes were introduced"
375+
)
376+
def test_pointer_get_attributes_device_ordinal():
377+
attributes = [
378+
cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
379+
]
380+
381+
attrs = cuda.cuPointerGetAttributes(len(attributes), attributes, 0)
382+
383+
# device ordinals are always small numbers. A large number would indicate
384+
# an overflow error.
385+
386+
assert abs(attrs[1][0]) < 256
387+
388+
373389
@pytest.mark.skipif(not supportsManagedMemory(), reason="When new attributes were introduced")
374390
def test_cuda_mem_range_attr(device):
375391
size = 0x1000

cuda_core/cuda/core/system/_device.pyx

Lines changed: 155 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ from ._nvml_context cimport initialize
1515
include "_device_utils.pxi"
1616

1717

18+
BrandType = nvml.BrandType
1819
EventType = nvml.EventType
1920
FieldId = nvml.FieldId
2021

@@ -307,6 +308,77 @@ cdef class DeviceEvents:
307308
return EventData(nvml.event_set_wait_v2(self._event_set, timeout_ms))
308309

309310

311+
cdef class DeviceAttributes:
312+
"""
313+
Various device attributes.
314+
"""
315+
def __init__(self, attributes: nvml.DeviceAttributes):
316+
self._attributes = attributes
317+
318+
@property
319+
def multiprocessor_count(self) -> int:
320+
"""
321+
The streaming multiprocessor count
322+
"""
323+
return self._attributes.multiprocessor_count
324+
325+
@property
326+
def shared_copy_engine_count(self) -> int:
327+
"""
328+
The shared copy engine count
329+
"""
330+
return self._attributes.shared_copy_engine_count
331+
332+
@property
333+
def shared_decoder_count(self) -> int:
334+
"""
335+
The shared decoder engine count
336+
"""
337+
return self._attributes.shared_decoder_count
338+
339+
@property
340+
def shared_encoder_count(self) -> int:
341+
"""
342+
The shared encoder engine count
343+
"""
344+
return self._attributes.shared_encoder_count
345+
346+
@property
347+
def shared_jpeg_count(self) -> int:
348+
"""
349+
The shared JPEG engine count
350+
"""
351+
return self._attributes.shared_jpeg_count
352+
353+
@property
354+
def shared_ofa_count(self) -> int:
355+
"""
356+
The shared optical flow accelerator (OFA) engine count
357+
"""
358+
return self._attributes.shared_ofa_count
359+
360+
@property
361+
def gpu_instance_slice_count(self) -> int:
362+
"""
363+
The GPU instance slice count
364+
"""
365+
return self._attributes.gpu_instance_slice_count
366+
367+
@property
368+
def compute_instance_slice_count(self) -> int:
369+
"""
370+
The compute instance slice count
371+
"""
372+
return self._attributes.compute_instance_slice_count
373+
374+
@property
375+
def memory_size_mb(self) -> int:
376+
"""
377+
Device memory size in MiB
378+
"""
379+
return self._attributes.memory_size_mb
380+
381+
310382
cdef class FieldValue:
311383
"""
312384
Represents the data from a single field value.
@@ -450,45 +522,64 @@ cdef class Device:
450522
about devices and their topology, as provided by the NVIDIA Management
451523
Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`.
452524

525+
Creating a device instance causes NVML to initialize the target GPU.
526+
NVML may initialize additional GPUs if the target GPU is an SLI slave.
527+
453528
Parameters
454529
----------
455530
index: int, optional
456-
Integer representing the CUDA device index to get a handle to.
531+
Integer representing the CUDA device index to get a handle to. Valid
532+
values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.
533+
534+
The order in which devices are enumerated has no guarantees of
535+
consistency between reboots. For that reason, it is recommended that
536+
devices are looked up by their PCI ids or UUID.
537+
457538
uuid: bytes or str, optional
458539
UUID of a CUDA device to get a handle to.
459540

541+
pci_bus_id: bytes or str, optional
542+
PCI bus ID of a CUDA device to get a handle to.
543+
460544
Raises
461545
------
462546
ValueError
463-
If neither `index` nor `uuid` are specified or if both are specified.
547+
If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
464548
"""
465549

466550
cdef intptr_t _handle
467551

468-
def __init__(self, index: int | None = None, uuid: bytes | str | None = None, handle: int | None = None):
552+
def __init__(
553+
self,
554+
*,
555+
index: int | None = None,
556+
uuid: bytes | str | None = None,
557+
pci_bus_id: bytes | str | None = None,
558+
handle: int | None = None
559+
):
469560
initialize()
470561

471-
args = [index, uuid, handle]
562+
args = [index, uuid, pci_bus_id, handle]
472563
cdef int arg_count = sum(arg is not None for arg in args)
473564

474565
if arg_count > 1:
475-
raise ValueError("Handle requires only one of `index`, `uuid` or `handle`.")
566+
raise ValueError("Handle requires only one of `index`, `uuid`, `pci_bus_id` or `handle`.")
476567
if arg_count == 0:
477-
raise ValueError("Handle requires either a device `index` or `uuid`.")
568+
raise ValueError("Handle requires either a device `index`, `pci_bus_id`, or `uuid`.")
478569

479570
if index is not None:
480571
self._handle = nvml.device_get_handle_by_index_v2(index)
481572
elif uuid is not None:
482573
if isinstance(uuid, bytes):
483574
uuid = uuid.decode("ascii")
484575
self._handle = nvml.device_get_handle_by_uuid(uuid)
576+
elif pci_bus_id is not None:
577+
if isinstance(pci_bus_id, bytes):
578+
pci_bus_id = pci_bus_id.decode("ascii")
579+
self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
485580
elif handle is not None:
486581
self._handle = handle
487582

488-
@property
489-
def handle(self) -> int:
490-
return self._handle
491-
492583
@classmethod
493584
def get_all_devices(cls) -> Iterable[Device]:
494585
"""
@@ -501,7 +592,7 @@ cdef class Device:
501592
"""
502593
total = nvml.device_get_count_v2()
503594
for device_id in range(total):
504-
yield cls(device_id)
595+
yield cls(index=device_id)
505596

506597
@property
507598
def architecture(self) -> DeviceArchitecture:
@@ -565,6 +656,24 @@ cdef class Device:
565656
"""
566657
return nvml.device_get_name(self._handle)
567658

659+
@property
660+
def brand(self) -> BrandType:
661+
"""
662+
Brand of the device
663+
"""
664+
return BrandType(nvml.device_get_brand(self._handle))
665+
666+
@property
667+
def index(self) -> int:
668+
"""
669+
The NVML index of this device.
670+
671+
The order in which NVML enumerates devices has no guarantees of
672+
consistency between reboots. For that reason it is recommended that
673+
devices be looked up by their PCI ids or GPU UUID.
674+
"""
675+
return nvml.device_get_index(self._handle)
676+
568677
@property
569678
def pci_info(self) -> PciInfo:
570679
"""
@@ -647,6 +756,39 @@ cdef class Device:
647756
bitmask[0] = nvml.device_get_supported_event_types(self._handle)
648757
return [EventType(1 << ev) for ev in _unpack_bitmask(bitmask)]
649758
759+
@property
760+
def attributes(self) -> DeviceAttributes:
761+
"""
762+
Get various device attributes.
763+
764+
For Ampere™ or newer fully supported devices. Only available on Linux
765+
systems.
766+
"""
767+
return DeviceAttributes(nvml.device_get_attributes_v2(self._handle))
768+
769+
@property
770+
def is_c2c_mode_enabled(self) -> bool:
771+
"""
772+
Whether the C2C (Chip-to-Chip) mode is enabled for this device.
773+
"""
774+
return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled)
775+
776+
@property
777+
def persistence_mode_enabled(self) -> bool:
778+
"""
779+
Whether persistence mode is enabled for this device.
780+
781+
For Linux only.
782+
"""
783+
return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED
784+
785+
@persistence_mode_enabled.setter
786+
def persistence_mode_enabled(self, enabled: bool) -> None:
787+
nvml.device_set_persistence_mode(
788+
self._handle,
789+
nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED
790+
)
791+
650792
def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
651793
"""
652794
Get multiple field values from the device.
@@ -690,8 +832,10 @@ cdef class Device:
690832
691833
__all__ = [
692834
"BAR1MemoryInfo",
835+
"BrandType",
693836
"Device",
694837
"DeviceArchitecture",
838+
"DeviceAttributes",
695839
"DeviceEvents",
696840
"EventData",
697841
"EventType",

cuda_core/docs/source/api.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,19 @@ CUDA system information and NVIDIA Management Library (NVML)
8989
:template: autosummary/cyclass.rst
9090

9191
system.Device
92+
system.BAR1MemoryInfo
93+
system.BrandType
9294
system.DeviceArchitecture
95+
system.DeviceAttributes
9396
system.DeviceEvents
9497
system.EventData
9598
system.EventType
9699
system.FieldId
97100
system.FieldValue
98101
system.FieldValues
99102
system.MemoryInfo
100-
system.BAR1MemoryInfo
101103
system.PciInfo
102104

103-
104105
.. module:: cuda.core.utils
105106

106107
Utility functions

0 commit comments

Comments
 (0)