Skip to content

Commit f4b4b26

Browse files
committed
Merge remote-tracking branch 'upstream/main' into cuda.core.system
2 parents a764db5 + 492cc20 commit f4b4b26

16 files changed

Lines changed: 2466 additions & 911 deletions

File tree

.github/actions/doc_preview/action.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,9 @@ description: Preview or clean up docs built from PRs
1010
inputs:
1111
source-folder:
1212
required: true
13-
type: string
1413
description: "Source code directory"
1514
pr-number:
1615
required: true
17-
type: string
1816
description: "Pull request number"
1917

2018
runs:

.github/actions/fetch_ctk/action.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,15 @@ description: Fetch (or create) a mini CUDA Toolkit from cache
99
inputs:
1010
host-platform:
1111
required: true
12-
type: string
1312
cuda-version:
1413
required: true
15-
type: string
1614
cuda-components:
1715
description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'"
1816
required: false
19-
type: string
2017
default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,libnvjitlink,libcufile"
2118
cuda-path:
2219
description: "where the CTK components will be installed to, relative to $PWD"
2320
required: false
24-
type: string
2521
default: "./cuda_toolkit"
2622

2723
runs:

.github/actions/install_unix_deps/action.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,8 @@ description: Install needed dependencies, regardless if using GitHub- or self- h
99
inputs:
1010
dependencies:
1111
required: true
12-
type: string
1312
dependent_exes:
1413
required: true
15-
type: string
1614

1715
runs:
1816
using: composite

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ ci:
1515
# pre-commit autoupdate --freeze
1616
repos:
1717
- repo: https://github.com/astral-sh/ruff-pre-commit
18-
rev: 9c89adb347f6b973f4905a4be0051eb2ecf85dea # frozen: v0.13.3
18+
rev: 5ba58aca0bd5bc7c0e1c0fc45af2e88d6a2bde83 # frozen: v0.14.10
1919
hooks:
2020
- id: ruff-check
2121
args: [--fix, --show-fixes]
@@ -68,15 +68,15 @@ repos:
6868
- id: rst-inline-touching-normal
6969

7070
- repo: https://github.com/pre-commit/mirrors-mypy
71-
rev: 9f70dc58c23dfcca1b97af99eaeee3140a807c7e # frozen: v1.18.2
71+
rev: a66e98df7b4aeeb3724184b332785976d062b92e # frozen: v1.19.1
7272
hooks:
7373
- id: mypy
7474
name: mypy-pathfinder
7575
files: ^cuda_pathfinder/cuda/.*\.py$ # Exclude tests directory
7676
args: [--config-file=cuda_pathfinder/pyproject.toml]
7777

7878
- repo: https://github.com/rhysd/actionlint
79-
rev: "03d0035246f3e81f36aed592ffb4bebf33a03106" # frozen: v1.7.7
79+
rev: "0933c147c9d6587653d45fdcb4c497c57a65f9af" # frozen: v1.7.10
8080
hooks:
8181
- id: actionlint
8282
args: ["-shellcheck="]

cuda_bindings/pixi.lock

Lines changed: 2049 additions & 829 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cuda_bindings/pixi.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ pyglet = ">=2.1.9"
2121
numpy = "*"
2222

2323
[feature.cython-tests.dependencies]
24-
cython = ">=3.2,<3.3" # for tests that exercise APIs from cython
25-
setuptools = "*" # for distutils
26-
gxx = "*" # to compile the generated code
24+
cython = ">=3.2,<3.3" # for tests that exercise APIs from cython
25+
setuptools = "*" # for distutils
26+
gxx = "*" # to compile the generated code
2727
# These are necessary because running the Cython tests requires compiling
2828
# *after* the package is built, and the Cython tests depend on CUDA headers
2929
# transitively (through cuda-bindings)
@@ -57,7 +57,8 @@ cuda = "13"
5757
cuda-version = "13.1.*"
5858

5959
[environments]
60-
cu13 = { features = ["cu13", "test", "cython-tests"], solve-group = "cu13" }
60+
default = { features = ["cu13", "test", "cython-tests"] }
61+
cu13 = { features = ["cu13", "test", "cython-tests"] }
6162

6263
# TODO: check if these can be extracted from pyproject.toml
6364
[package]

cuda_core/build_hooks.py

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import glob
1212
import os
1313
import re
14-
import subprocess
1514

1615
from Cython.Build import cythonize
1716
from setuptools import Extension
@@ -26,32 +25,60 @@
2625

2726

2827
@functools.cache
29-
def _get_proper_cuda_bindings_major_version() -> str:
30-
# for local development (with/without build isolation)
31-
try:
32-
import cuda.bindings
28+
def _get_cuda_paths() -> list[str]:
29+
CUDA_PATH = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", None))
30+
if not CUDA_PATH:
31+
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
32+
CUDA_PATH = CUDA_PATH.split(os.pathsep)
33+
print("CUDA paths:", CUDA_PATH)
34+
return CUDA_PATH
3335

34-
return cuda.bindings.__version__.split(".")[0]
35-
except ImportError:
36-
pass
3736

38-
# for custom overwrite, e.g. in CI
37+
@functools.cache
38+
def _determine_cuda_major_version() -> str:
39+
"""Determine the CUDA major version for building cuda.core.
40+
41+
This version is used for two purposes:
42+
1. Determining which cuda-bindings version to install as a build dependency
43+
2. Setting CUDA_CORE_BUILD_MAJOR for Cython compile-time conditionals
44+
45+
The version is derived from (in order of priority):
46+
1. CUDA_CORE_BUILD_MAJOR environment variable (explicit override, e.g. in CI)
47+
2. CUDA_VERSION macro in cuda.h from CUDA_PATH or CUDA_HOME
48+
49+
Since CUDA_PATH or CUDA_HOME is required for the build (to provide include
50+
directories), the cuda.h header should always be available.
51+
"""
52+
# Explicit override, e.g. in CI.
3953
cuda_major = os.environ.get("CUDA_CORE_BUILD_MAJOR")
4054
if cuda_major is not None:
55+
print("CUDA MAJOR VERSION:", cuda_major)
4156
return cuda_major
4257

43-
# also for local development
44-
try:
45-
out = subprocess.run("nvidia-smi", env=os.environ, capture_output=True, check=True) # noqa: S603, S607
46-
m = re.search(r"CUDA Version:\s*([\d\.]+)", out.stdout.decode())
47-
if m:
48-
return m.group(1).split(".")[0]
49-
except (FileNotFoundError, subprocess.CalledProcessError):
50-
# the build machine has no driver installed
51-
pass
52-
53-
# default fallback
54-
return "13"
58+
# Derive from the CUDA headers (the authoritative source for what we compile against).
59+
cuda_path = _get_cuda_paths()
60+
for root in cuda_path:
61+
cuda_h = os.path.join(root, "include", "cuda.h")
62+
try:
63+
with open(cuda_h, encoding="utf-8") as f:
64+
for line in f:
65+
m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
66+
if m:
67+
v = int(m.group(1))
68+
# CUDA_VERSION is e.g. 12020 for 12.2.
69+
cuda_major = str(v // 1000)
70+
print("CUDA MAJOR VERSION:", cuda_major)
71+
return cuda_major
72+
except OSError:
73+
continue
74+
75+
# CUDA_PATH or CUDA_HOME is required for the build, so we should not reach here
76+
# in normal circumstances. Raise an error to make the issue clear.
77+
raise RuntimeError(
78+
"Cannot determine CUDA major version. "
79+
"Set CUDA_CORE_BUILD_MAJOR environment variable, or ensure CUDA_PATH or CUDA_HOME "
80+
"points to a valid CUDA installation with include/cuda.h."
81+
)
5582

5683

5784
# used later by setup()
@@ -68,25 +95,12 @@ def _build_cuda_core():
6895

6996
# It seems setuptools' wildcard support has problems for namespace packages,
7097
# so we explicitly spell out all Extension instances.
71-
root_module = "cuda.core"
72-
root_path = f"{os.path.sep}".join(root_module.split(".")) + os.path.sep
73-
ext_files = glob.glob(f"{root_path}/**/*.pyx", recursive=True)
74-
75-
def strip_prefix_suffix(filename):
76-
return filename[len(root_path) : -4]
77-
78-
module_names = (strip_prefix_suffix(f) for f in ext_files)
79-
80-
@functools.cache
81-
def get_cuda_paths():
82-
CUDA_PATH = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", None))
83-
if not CUDA_PATH:
84-
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
85-
CUDA_PATH = CUDA_PATH.split(os.pathsep)
86-
print("CUDA paths:", CUDA_PATH)
87-
return CUDA_PATH
98+
def module_names():
99+
root_path = os.path.sep.join(["cuda", "core", ""])
100+
for filename in glob.glob(f"{root_path}/**/*.pyx", recursive=True):
101+
yield filename[len(root_path) : -4]
88102

89-
all_include_dirs = list(os.path.join(root, "include") for root in get_cuda_paths())
103+
all_include_dirs = list(os.path.join(root, "include") for root in _get_cuda_paths())
90104
extra_compile_args = []
91105
if COMPILE_FOR_COVERAGE:
92106
# CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not
@@ -101,11 +115,11 @@ def get_cuda_paths():
101115
language="c++",
102116
extra_compile_args=extra_compile_args,
103117
)
104-
for mod in module_names
118+
for mod in module_names()
105119
)
106120

107121
nthreads = int(os.environ.get("CUDA_PYTHON_PARALLEL_LEVEL", os.cpu_count() // 2))
108-
compile_time_env = {"CUDA_CORE_BUILD_MAJOR": int(_get_proper_cuda_bindings_major_version())}
122+
compile_time_env = {"CUDA_CORE_BUILD_MAJOR": int(_determine_cuda_major_version())}
109123
compiler_directives = {"embedsignature": True, "warn.deprecated.IF": False, "freethreading_compatible": True}
110124
if COMPILE_FOR_COVERAGE:
111125
compiler_directives["linetrace"] = True
@@ -132,7 +146,7 @@ def build_wheel(wheel_directory, config_settings=None, metadata_directory=None):
132146

133147

134148
def _get_cuda_bindings_require():
135-
cuda_major = _get_proper_cuda_bindings_major_version()
149+
cuda_major = _determine_cuda_major_version()
136150
return [f"cuda-bindings=={cuda_major}.*"]
137151

138152

cuda_core/cuda/core/_device.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ from cuda.bindings cimport cydriver
99
from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
1010

1111
import threading
12-
from typing import Optional, TYPE_CHECKING, Union
12+
from typing import TYPE_CHECKING
1313

1414
from cuda.core._context import Context, ContextOptions
1515
from cuda.core._event import Event, EventOptions
@@ -1217,7 +1217,7 @@ class Device:
12171217
def __reduce__(self):
12181218
return Device, (self.device_id,)
12191219

1220-
def set_current(self, ctx: Context = None) -> Union[Context, None]:
1220+
def set_current(self, ctx: Context = None) -> Context | None:
12211221
"""Set device to be used for GPU executions.
12221222

12231223
Initializes CUDA and sets the calling thread to a valid CUDA
@@ -1233,7 +1233,7 @@ class Device:
12331233

12341234
Returns
12351235
-------
1236-
Union[:obj:`~_context.Context`, None], optional
1236+
:obj:`~_context.Context`, optional
12371237
Popped context.
12381238

12391239
Examples

cuda_core/cuda/core/_memoryview.pyx

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,22 +137,22 @@ cdef class StridedMemoryView:
137137

138138
@classmethod
139139
def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
140-
cdef StridedMemoryView buf
141-
with warnings.catch_warnings():
142-
warnings.simplefilter("ignore")
143-
buf = cls()
140+
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
144141
view_as_dlpack(obj, stream_ptr, buf)
145142
return buf
146143

147144
@classmethod
148145
def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
149-
cdef StridedMemoryView buf
150-
with warnings.catch_warnings():
151-
warnings.simplefilter("ignore")
152-
buf = cls()
146+
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
153147
view_as_cai(obj, stream_ptr, buf)
154148
return buf
155149

150+
@classmethod
151+
def from_array_interface(cls, obj: object) -> StridedMemoryView:
152+
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
153+
view_as_array_interface(obj, buf)
154+
return buf
155+
156156
@classmethod
157157
def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView:
158158
if check_has_dlpack(obj):
@@ -597,6 +597,23 @@ cpdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
597597
return buf
598598

599599

600+
cpdef StridedMemoryView view_as_array_interface(obj, view=None):
601+
cdef dict data = obj.__array_interface__
602+
if data["version"] < 3:
603+
raise BufferError("only NumPy Array Interface v3 or above is supported")
604+
if data.get("mask") is not None:
605+
raise BufferError("mask is not supported")
606+
607+
cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
608+
buf.exporting_obj = obj
609+
buf.metadata = data
610+
buf.dl_tensor = NULL
611+
buf.ptr, buf.readonly = data["data"]
612+
buf.is_device_accessible = False
613+
buf.device_id = handle_return(driver.cuCtxGetDevice())
614+
return buf
615+
616+
600617
def args_viewable_as_strided_memory(tuple arg_indices):
601618
"""
602619
Decorator to create proxy objects to :obj:`StridedMemoryView` for the

0 commit comments

Comments
 (0)