Skip to content

Commit 6686b20

Browse files
committed
Fix build warnings and optimize handle passing
- Fix Cython nogil placement: move nogil after except+ (44 warnings) - Fix unreachable code in ManagedMemoryResource (move super().__init__ inside CUDA 13+ conditional block) - Optimize C++ functions to accept handles by const& instead of by value, avoiding unnecessary atomic ref count operations on shared_ptr
1 parent 27e9066 commit 6686b20

5 files changed

Lines changed: 63 additions & 63 deletions

File tree

cuda_core/cuda/core/_cpp/resource_handles.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ struct StreamBox {
234234
};
235235
} // namespace
236236

237-
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) {
237+
StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority) {
238238
GILReleaseGuard gil;
239239
CUstream stream;
240240
if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority(&stream, flags, priority))) {
@@ -301,7 +301,7 @@ struct EventBox {
301301
};
302302
} // namespace
303303

304-
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) {
304+
EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) {
305305
GILReleaseGuard gil;
306306
CUevent event;
307307
if (CUDA_SUCCESS != (err = p_cuEventCreate(&event, flags))) {
@@ -449,11 +449,11 @@ StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept {
449449
return get_box(h)->h_stream;
450450
}
451451

452-
void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept {
453-
get_box(h)->h_stream = std::move(h_stream);
452+
void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept {
453+
get_box(h)->h_stream = h_stream;
454454
}
455455

456-
DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) {
456+
DevicePtrHandle deviceptr_alloc_from_pool(size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) {
457457
GILReleaseGuard gil;
458458
CUdeviceptr ptr;
459459
if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync(&ptr, size, *h_pool, as_cu(h_stream)))) {
@@ -471,7 +471,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool,
471471
return DevicePtrHandle(box, &box->resource);
472472
}
473473

474-
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) {
474+
DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) {
475475
GILReleaseGuard gil;
476476
CUdeviceptr ptr;
477477
if (CUDA_SUCCESS != (err = p_cuMemAllocAsync(&ptr, size, as_cu(h_stream)))) {
@@ -612,7 +612,7 @@ struct ExportDataKeyHash {
612612
static std::mutex ipc_ptr_cache_mutex;
613613
static std::unordered_map<ExportDataKey, std::weak_ptr<DevicePtrBox>, ExportDataKeyHash> ipc_ptr_cache;
614614

615-
DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) {
615+
DevicePtrHandle deviceptr_import_ipc(const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) {
616616
auto data = const_cast<CUmemPoolPtrExportData*>(
617617
reinterpret_cast<const CUmemPoolPtrExportData*>(export_data));
618618

cuda_core/cuda/core/_cpp/resource_handles.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ ContextHandle get_current_context();
9393
// The stream structurally depends on the provided context handle.
9494
// When the last reference is released, cuStreamDestroy is called automatically.
9595
// Returns empty handle on error (caller must check).
96-
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority);
96+
StreamHandle create_stream_handle(const ContextHandle& h_ctx, unsigned int flags, int priority);
9797

9898
// Create a non-owning stream handle (references existing stream).
9999
// Use for borrowed streams (from foreign code) or built-in streams.
@@ -122,7 +122,7 @@ StreamHandle get_per_thread_stream();
122122
// The event structurally depends on the provided context handle.
123123
// When the last reference is released, cuEventDestroy is called automatically.
124124
// Returns empty handle on error (caller must check).
125-
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags);
125+
EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags);
126126

127127
// Create an owning event handle without context dependency.
128128
// Use for temporary events that are created and destroyed in the same scope.
@@ -173,13 +173,13 @@ using DevicePtrHandle = std::shared_ptr<const CUdeviceptr>;
173173
// Returns empty handle on error (caller must check).
174174
DevicePtrHandle deviceptr_alloc_from_pool(
175175
size_t size,
176-
MemoryPoolHandle h_pool,
177-
StreamHandle h_stream);
176+
const MemoryPoolHandle& h_pool,
177+
const StreamHandle& h_stream);
178178

179179
// Allocate device memory asynchronously via cuMemAllocAsync.
180180
// When the last reference is released, cuMemFreeAsync is called on the stored stream.
181181
// Returns empty handle on error (caller must check).
182-
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream);
182+
DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream);
183183

184184
// Allocate device memory synchronously via cuMemAlloc.
185185
// When the last reference is released, cuMemFree is called.
@@ -207,16 +207,16 @@ DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner);
207207
// Note: Does not yet implement reference counting for nvbug 5570902.
208208
// On error, returns empty handle and sets thread-local error (use get_last_error()).
209209
DevicePtrHandle deviceptr_import_ipc(
210-
MemoryPoolHandle h_pool,
210+
const MemoryPoolHandle& h_pool,
211211
const void* export_data,
212-
StreamHandle h_stream);
212+
const StreamHandle& h_stream);
213213

214214
// Access the deallocation stream for a device pointer handle (read-only).
215215
// For non-owning handles, the stream is not used but can still be accessed.
216216
StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept;
217217

218218
// Set the deallocation stream for a device pointer handle.
219-
void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept;
219+
void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept;
220220

221221
// ============================================================================
222222
// Overloaded helper functions to extract raw resources from handles

cuda_core/cuda/core/_memory/_managed_memory_resource.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,11 @@ cdef class ManagedMemoryResource(_MemPool):
9090
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
9191

9292
opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED
93+
94+
super().__init__(device_id, opts_base)
9395
ELSE:
9496
raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
9597

96-
super().__init__(device_id, opts_base)
97-
9898
@property
9999
def is_device_accessible(self) -> bool:
100100
"""Return True. This memory resource provides device-accessible buffers."""

cuda_core/cuda/core/_resource_handles.pxd

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -56,41 +56,41 @@ cdef cydriver.CUresult peek_last_error() noexcept nogil
5656
cdef void clear_last_error() noexcept nogil
5757

5858
# Context handles
59-
cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) nogil except+
60-
cdef ContextHandle get_primary_context(int device_id) nogil except+
61-
cdef ContextHandle get_current_context() nogil except+
59+
cdef ContextHandle create_context_handle_ref(cydriver.CUcontext ctx) except+ nogil
60+
cdef ContextHandle get_primary_context(int device_id) except+ nogil
61+
cdef ContextHandle get_current_context() except+ nogil
6262

6363
# Stream handles
6464
cdef StreamHandle create_stream_handle(
65-
ContextHandle h_ctx, unsigned int flags, int priority) nogil except+
66-
cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) nogil except+
67-
cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) nogil except+
68-
cdef StreamHandle get_legacy_stream() nogil except+
69-
cdef StreamHandle get_per_thread_stream() nogil except+
65+
const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
66+
cdef StreamHandle create_stream_handle_ref(cydriver.CUstream stream) except+ nogil
67+
cdef StreamHandle create_stream_handle_with_owner(cydriver.CUstream stream, object owner) except+ nogil
68+
cdef StreamHandle get_legacy_stream() except+ nogil
69+
cdef StreamHandle get_per_thread_stream() except+ nogil
7070

7171
# Event handles
72-
cdef EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) nogil except+
73-
cdef EventHandle create_event_handle_noctx(unsigned int flags) nogil except+
72+
cdef EventHandle create_event_handle(const ContextHandle& h_ctx, unsigned int flags) except+ nogil
73+
cdef EventHandle create_event_handle_noctx(unsigned int flags) except+ nogil
7474
cdef EventHandle create_event_handle_ipc(
75-
const cydriver.CUipcEventHandle& ipc_handle) nogil except+
75+
const cydriver.CUipcEventHandle& ipc_handle) except+ nogil
7676

7777
# Memory pool handles
7878
cdef MemoryPoolHandle create_mempool_handle(
79-
const cydriver.CUmemPoolProps& props) nogil except+
80-
cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) nogil except+
81-
cdef MemoryPoolHandle get_device_mempool(int device_id) nogil except+
79+
const cydriver.CUmemPoolProps& props) except+ nogil
80+
cdef MemoryPoolHandle create_mempool_handle_ref(cydriver.CUmemoryPool pool) except+ nogil
81+
cdef MemoryPoolHandle get_device_mempool(int device_id) except+ nogil
8282
cdef MemoryPoolHandle create_mempool_handle_ipc(
83-
int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+
83+
int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
8484

8585
# Device pointer handles
8686
cdef DevicePtrHandle deviceptr_alloc_from_pool(
87-
size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+
88-
cdef DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) nogil except+
89-
cdef DevicePtrHandle deviceptr_alloc(size_t size) nogil except+
90-
cdef DevicePtrHandle deviceptr_alloc_host(size_t size) nogil except+
91-
cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) nogil except+
92-
cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) nogil except+
87+
size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
88+
cdef DevicePtrHandle deviceptr_alloc_async(size_t size, const StreamHandle& h_stream) except+ nogil
89+
cdef DevicePtrHandle deviceptr_alloc(size_t size) except+ nogil
90+
cdef DevicePtrHandle deviceptr_alloc_host(size_t size) except+ nogil
91+
cdef DevicePtrHandle deviceptr_create_ref(cydriver.CUdeviceptr ptr) except+ nogil
92+
cdef DevicePtrHandle deviceptr_create_with_owner(cydriver.CUdeviceptr ptr, object owner) except+ nogil
9393
cdef DevicePtrHandle deviceptr_import_ipc(
94-
MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+
94+
const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
9595
cdef StreamHandle deallocation_stream(const DevicePtrHandle& h) noexcept nogil
96-
cdef void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil
96+
cdef void set_deallocation_stream(const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil

cuda_core/cuda/core/_resource_handles.pyx

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -40,56 +40,56 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
4040

4141
# Context handles
4242
ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
43-
cydriver.CUcontext ctx) nogil except+
43+
cydriver.CUcontext ctx) except+ nogil
4444
ContextHandle get_primary_context "cuda_core::get_primary_context" (
45-
int device_id) nogil except+
46-
ContextHandle get_current_context "cuda_core::get_current_context" () nogil except+
45+
int device_id) except+ nogil
46+
ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil
4747

4848
# Stream handles
4949
StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
50-
ContextHandle h_ctx, unsigned int flags, int priority) nogil except+
50+
const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
5151
StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
52-
cydriver.CUstream stream) nogil except+
52+
cydriver.CUstream stream) except+ nogil
5353
StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
54-
cydriver.CUstream stream, object owner) nogil except+
55-
StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () nogil except+
56-
StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () nogil except+
54+
cydriver.CUstream stream, object owner) except+ nogil
55+
StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
56+
StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil
5757

5858
# Event handles (note: _create_event_handle* are internal due to C++ overloading)
5959
EventHandle create_event_handle "cuda_core::create_event_handle" (
60-
ContextHandle h_ctx, unsigned int flags) nogil except+
60+
const ContextHandle& h_ctx, unsigned int flags) except+ nogil
6161
EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
62-
unsigned int flags) nogil except+
62+
unsigned int flags) except+ nogil
6363
EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
64-
const cydriver.CUipcEventHandle& ipc_handle) nogil except+
64+
const cydriver.CUipcEventHandle& ipc_handle) except+ nogil
6565

6666
# Memory pool handles
6767
MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
68-
const cydriver.CUmemPoolProps& props) nogil except+
68+
const cydriver.CUmemPoolProps& props) except+ nogil
6969
MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
70-
cydriver.CUmemoryPool pool) nogil except+
70+
cydriver.CUmemoryPool pool) except+ nogil
7171
MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
72-
int device_id) nogil except+
72+
int device_id) except+ nogil
7373
MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
74-
int fd, cydriver.CUmemAllocationHandleType handle_type) nogil except+
74+
int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
7575

7676
# Device pointer handles
7777
DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
78-
size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) nogil except+
78+
size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
7979
DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
80-
size_t size, StreamHandle h_stream) nogil except+
81-
DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) nogil except+
82-
DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) nogil except+
80+
size_t size, const StreamHandle& h_stream) except+ nogil
81+
DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
82+
DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
8383
DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
84-
cydriver.CUdeviceptr ptr) nogil except+
84+
cydriver.CUdeviceptr ptr) except+ nogil
8585
DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
86-
cydriver.CUdeviceptr ptr, object owner) nogil except+
86+
cydriver.CUdeviceptr ptr, object owner) except+ nogil
8787
DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
88-
MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) nogil except+
88+
const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
8989
StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
9090
const DevicePtrHandle& h) noexcept nogil
9191
void set_deallocation_stream "cuda_core::set_deallocation_stream" (
92-
const DevicePtrHandle& h, StreamHandle h_stream) noexcept nogil
92+
const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
9393

9494

9595
# =============================================================================

0 commit comments

Comments
 (0)