Skip to content

Commit ebf19fc

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pyi-experiment
2 parents 1aeb0f9 + 342b319 commit ebf19fc

29 files changed

Lines changed: 434 additions & 57 deletions

.github/workflows/release.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ jobs:
101101
with:
102102
python-version: "3.12"
103103

104-
- name: Self-test release-notes checker
105-
run: |
106-
pip install pytest
107-
pytest ci/tools/tests
104+
# - name: Self-test release-notes checker
105+
# run: |
106+
# pip install pytest
107+
# pytest ci/tools/tests
108108

109109
- name: Check versioned release notes exist
110110
run: |

.github/workflows/test-wheel-linux.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,21 +130,21 @@ jobs:
130130
path: ./cuda_pathfinder
131131

132132
- name: Download cuda-python build artifacts
133-
if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
133+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
134134
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
135135
with:
136136
name: cuda-python-wheel
137137
path: .
138138

139139
- name: Download cuda.bindings build artifacts
140-
if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
140+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
141141
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
142142
with:
143143
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
144144
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
145145

146146
- name: Download cuda-python & cuda.bindings build artifacts from the prior branch
147-
if: ${{ env.USE_BACKPORT_BINDINGS == '1' }}
147+
if: ${{ env.BINDINGS_SOURCE == 'backport' }}
148148
env:
149149
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
150150
run: |
@@ -184,6 +184,7 @@ jobs:
184184
ls -lahR .
185185
186186
- name: Display structure of downloaded cuda.bindings artifacts
187+
if: ${{ env.BINDINGS_SOURCE != 'published' }}
187188
run: |
188189
pwd
189190
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
@@ -285,6 +286,7 @@ jobs:
285286
run: run-tests core
286287

287288
- name: Ensure cuda-python installable
289+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
288290
run: |
289291
if [[ "${{ matrix.LOCAL_CTK }}" == 1 ]]; then
290292
pip install --only-binary=:all: cuda_python*.whl

.github/workflows/test-wheel-windows.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,21 +125,21 @@ jobs:
125125
path: ./cuda_pathfinder
126126

127127
- name: Download cuda-python build artifacts
128-
if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
128+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
129129
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
130130
with:
131131
name: cuda-python-wheel
132132
path: .
133133

134134
- name: Download cuda.bindings build artifacts
135-
if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
135+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
136136
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
137137
with:
138138
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
139139
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
140140

141141
- name: Download cuda-python & cuda.bindings build artifacts from the prior branch
142-
if: ${{ env.USE_BACKPORT_BINDINGS == '1' }}
142+
if: ${{ env.BINDINGS_SOURCE == 'backport' }}
143143
env:
144144
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
145145
run: |
@@ -170,6 +170,7 @@ jobs:
170170
Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName
171171
172172
- name: Display structure of downloaded cuda.bindings artifacts
173+
if: ${{ env.BINDINGS_SOURCE != 'published' }}
173174
run: |
174175
Get-Location
175176
Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName
@@ -261,6 +262,7 @@ jobs:
261262
run: run-tests core
262263

263264
- name: Ensure cuda-python installable
265+
if: ${{ env.BINDINGS_SOURCE == 'main' }}
264266
run: |
265267
if ('${{ matrix.LOCAL_CTK }}' -eq '1') {
266268
pip install --only-binary=:all: (Get-ChildItem -Filter cuda_python*.whl).FullName

ci/tools/env-vars

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,38 @@ elif [[ "${1}" == "test" ]]; then
5252
BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${BUILD_CUDA_VER})"
5353
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})"
5454
CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${BUILD_CUDA_VER}-${HOST_PLATFORM}"
55-
# USE_BACKPORT_BINDINGS flags the CTK-major-mismatch case where the
56-
# current-run bindings wheel was built for a different CTK major than the
57-
# one under test, so we must pull the bindings wheel from the backport
58-
# branch instead. This is independent of whether bindings tests run.
59-
# SKIP_CUDA_BINDINGS_TEST is the test-time gate: it is set when the CTK
60-
# majors differ OR when the caller tells us to skip for path-filter
61-
# reasons via SKIP_BINDINGS_TEST_OVERRIDE.
55+
56+
# BINDINGS_SOURCE controls which cuda-bindings to install at test time:
57+
# main — use the just-built bindings wheel from this CI run
58+
# backport — fetch bindings from the prior (N-1) branch
59+
# published — install from PyPI (cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*)
60+
#
61+
# SKIP_CUDA_BINDINGS_TEST / SKIP_CYTHON_TEST control which *tests* to run
62+
# (they do NOT affect installation — that's BINDINGS_SOURCE's job).
63+
64+
BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${BUILD_CUDA_VER})"
65+
TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${CUDA_VER})"
66+
6267
if [[ ${BUILD_CUDA_MAJOR} != ${TEST_CUDA_MAJOR} ]]; then
63-
USE_BACKPORT_BINDINGS=1
68+
# Major mismatch (e.g. build=13.x, test=12.x): use the backport branch.
69+
BINDINGS_SOURCE=backport
70+
SKIP_CUDA_BINDINGS_TEST=1
71+
SKIP_CYTHON_TEST=1
72+
elif [[ ${BUILD_CUDA_MINOR} != ${TEST_CUDA_MINOR} ]]; then
73+
# Same major, minor mismatch (e.g. build=13.2, test=13.0): use published
74+
# bindings from PyPI to test the real-world backward-compat scenario.
75+
BINDINGS_SOURCE=published
6476
SKIP_CUDA_BINDINGS_TEST=1
6577
SKIP_CYTHON_TEST=1
6678
else
67-
USE_BACKPORT_BINDINGS=0
68-
# Path-filter override only skips bindings tests, NOT cython tests
69-
# for other modules (e.g. cuda.core). Cython skip is driven solely
70-
# by the build/test CTK minor-version mismatch.
79+
# Exact match: use the just-built bindings wheel.
80+
BINDINGS_SOURCE=main
7181
if [[ "${SKIP_BINDINGS_TEST_OVERRIDE:-0}" == "1" ]]; then
7282
SKIP_CUDA_BINDINGS_TEST=1
7383
else
7484
SKIP_CUDA_BINDINGS_TEST=0
7585
fi
76-
BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${BUILD_CUDA_VER})"
77-
TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${CUDA_VER})"
78-
if [[ ${BUILD_CUDA_MINOR} != ${TEST_CUDA_MINOR} ]]; then
79-
SKIP_CYTHON_TEST=1
80-
else
81-
SKIP_CYTHON_TEST=0
82-
fi
86+
SKIP_CYTHON_TEST=0
8387
fi
8488
# We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort
8589
# We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix
@@ -93,10 +97,11 @@ elif [[ "${1}" == "test" ]]; then
9397
fi
9498
{
9599
echo "SETUP_SANITIZER=${SETUP_SANITIZER}"
100+
echo "BINDINGS_SOURCE=${BINDINGS_SOURCE}"
96101
echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}"
97102
echo "SKIP_CYTHON_TEST=${SKIP_CYTHON_TEST}"
98103
echo "TEST_CUDA_MAJOR=${TEST_CUDA_MAJOR}"
99-
echo "USE_BACKPORT_BINDINGS=${USE_BACKPORT_BINDINGS}"
104+
echo "TEST_CUDA_MINOR=${TEST_CUDA_MINOR}"
100105
} >> $GITHUB_ENV
101106
fi
102107

ci/tools/run-tests

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,12 @@ elif [[ "${test_module}" == "bindings" ]]; then
5454
fi
5555
popd
5656
elif [[ "${test_module}" == "core" ]]; then
57-
# If build/test majors match: cuda.bindings is installed in the previous step.
58-
# If mismatch: cuda.bindings is installed from the backport branch.
59-
if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then
60-
echo "Installing bindings wheel"
57+
# Install cuda.bindings for core tests based on BINDINGS_SOURCE.
58+
if [[ "${BINDINGS_SOURCE}" == "published" ]]; then
59+
echo "Installing published cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.* from PyPI"
60+
pip install "cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*"
61+
elif [[ "${BINDINGS_SOURCE}" == "backport" || "${BINDINGS_SOURCE}" == "main" ]]; then
62+
echo "Installing bindings wheel (source: ${BINDINGS_SOURCE})"
6163
if [[ "${LOCAL_CTK}" == 1 ]]; then
6264
pip install "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl
6365
else
@@ -83,6 +85,8 @@ elif [[ "${test_module}" == "core" ]]; then
8385
# Constrain cuda-toolkit to the requested CTK version to avoid
8486
# pip pulling in a newer nvidia-cuda-runtime that conflicts with it.
8587
pip install "${WHL_EXTRA[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" "cuda-toolkit==${CUDA_VER_MINOR}.*"
88+
echo "Installed packages before core tests:"
89+
pip list
8690
echo "Running core tests"
8791
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
8892
# Currently our CI always installs the latest bindings (from either major version).

cuda_bindings/tests/nvml/conftest.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,23 @@ def nmigs(handles):
106106

107107
@pytest.fixture
108108
def mig_handles(nmigs):
109-
handles = [nvml.device_get_mig_device_handle_by_index(i) for i in range(nmigs)]
110-
assert len(handles) == nmigs
109+
handles = []
110+
with NVMLInitializer():
111+
dev_count = nvml.device_get_count_v2()
112+
113+
for dev_idx in range(dev_count):
114+
try:
115+
dev = nvml.device_get_handle_by_index_v2(dev_idx)
116+
except nvml.NoPermissionError:
117+
continue
118+
for mig_idx in range(nmigs):
119+
try:
120+
mig = nvml.device_get_mig_device_handle_by_index(dev, mig_idx)
121+
except nvml.NotFoundError:
122+
# Not all MIG devices may be available
123+
continue
124+
else:
125+
handles.append(mig)
111126
return handles
112127

113128

cuda_bindings/tests/nvml/test_cuda.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
import os
55

6+
import pytest
7+
68
import cuda.bindings.driver as cuda
79
from cuda.bindings import nvml
810

@@ -56,6 +58,10 @@ def test_cuda_device_order():
5658
cuda_devices = get_cuda_device_names()
5759
nvml_devices = get_nvml_device_names()
5860

61+
if any("Thor" in device["name"] for device in nvml_devices):
62+
pytest.skip("Skipping test on Thor, which has non-standard device naming")
63+
return
64+
5965
if "CUDA_VISIBLE_DEVICES" not in os.environ:
6066
# If that environment variable isn't set, the device lists should match exactly
6167
assert cuda_devices == nvml_devices, "CUDA and NVML device lists do not match"

cuda_bindings/tests/nvml/test_pynvml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def test_device_get_memory_info(ngpus, handles):
187187

188188
def test_device_get_utilization_rates(ngpus, handles):
189189
for i in range(ngpus):
190-
with unsupported_before(handles[i], "FERMI"):
190+
with unsupported_before(handles[i], None):
191191
urate = nvml.device_get_utilization_rates(handles[i])
192192
assert urate.gpu >= 0
193193
assert urate.memory >= 0

cuda_core/cuda/core/__init__.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,46 @@ def _import_versioned_module():
2828
del _import_versioned_module
2929

3030

31+
def _patch_rlcompleter_for_cython_properties():
32+
# TODO: This can be removed when Python 3.13 is our minimum-supported version:
33+
# https://github.com/python/cpython/pull/149577
34+
35+
# Cython @property on cdef class compiles to a C-level getset_descriptor,
36+
# which rlcompleter's narrow isinstance(..., property) check misses; the
37+
# fallback getattr() then invokes the descriptor and any non-AttributeError
38+
# it raises kills tab completion. Extend that isinstance check to also
39+
# match getset_descriptor / member_descriptor. Only installed in
40+
# interactive mode so library users running scripts see no global
41+
# rlcompleter side effect.
42+
import os
43+
44+
if int(os.environ.get("CUDA_CORE_DONT_FIX_TAB_COMPLETION", "0")):
45+
# Explicit opt-out for users who don't want the global rlcompleter
46+
# side effect, even in an interactive session.
47+
return
48+
49+
import rlcompleter
50+
from types import GetSetDescriptorType, MemberDescriptorType
51+
52+
# This works by overriding the `property` built-in with a custom subclass of
53+
# property, but only in the rlcompleter module. This subclass overrides the
54+
# `__instancecheck__` method to also return True for getset_descriptor and
55+
# member_descriptor types, which are what Cython uses for properties on cdef
56+
# classes.
57+
class _PatchedPropMeta(type):
58+
def __instancecheck__(cls, inst):
59+
return isinstance(inst, (property, GetSetDescriptorType, MemberDescriptorType))
60+
61+
class _PatchedProperty(metaclass=_PatchedPropMeta):
62+
pass
63+
64+
rlcompleter.property = _PatchedProperty
65+
66+
67+
_patch_rlcompleter_for_cython_properties()
68+
del _patch_rlcompleter_for_cython_properties
69+
70+
3171
from cuda.core import checkpoint, system, utils
3272
from cuda.core._context import Context, ContextOptions
3373
from cuda.core._device import Device

cuda_core/cuda/core/_cpp/resource_handles.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ decltype(&cuLinkDestroy) p_cuLinkDestroy = nullptr;
7777
decltype(&cuGraphicsUnmapResources) p_cuGraphicsUnmapResources = nullptr;
7878
decltype(&cuGraphicsUnregisterResource) p_cuGraphicsUnregisterResource = nullptr;
7979

80+
// SM resource split (13.1+ — may be null on older drivers/bindings)
81+
#if CUDA_VERSION >= 13010
82+
decltype(&cuDevSmResourceSplit) p_cuDevSmResourceSplit = nullptr;
83+
#else
84+
void* p_cuDevSmResourceSplit = nullptr;
85+
#endif
86+
8087
// NVRTC function pointers
8188
decltype(&nvrtcDestroyProgram) p_nvrtcDestroyProgram = nullptr;
8289

@@ -1319,4 +1326,27 @@ FileDescriptorHandle create_fd_handle_ref(int fd) {
13191326
#endif
13201327
}
13211328

1329+
// ============================================================================
1330+
// SM resource split wrapper
1331+
// ============================================================================
1332+
1333+
CUresult sm_resource_split(CUdevResource* result, unsigned int nbGroups,
1334+
const CUdevResource* input, CUdevResource* remainder,
1335+
unsigned int flags, void* groupParams) {
1336+
#if CUDA_VERSION >= 13010
1337+
if (!p_cuDevSmResourceSplit) {
1338+
return CUDA_ERROR_NOT_SUPPORTED;
1339+
}
1340+
return p_cuDevSmResourceSplit(
1341+
result, nbGroups, input, remainder, flags,
1342+
static_cast<CU_DEV_SM_RESOURCE_GROUP_PARAMS*>(groupParams));
1343+
#else
1344+
return CUDA_ERROR_NOT_SUPPORTED;
1345+
#endif
1346+
}
1347+
1348+
bool has_sm_resource_split() noexcept {
1349+
return p_cuDevSmResourceSplit != nullptr;
1350+
}
1351+
13221352
} // namespace cuda_core

0 commit comments

Comments
 (0)