Skip to content

Commit e350c5a

Browse files
authored
coverage: add tests for memory, launcher, linker, program, and utils coverage gaps (#2130)
* coverage: add tests for memory, launcher, linker, program, and utils coverage gaps * coverage: add tests for memory, launcher, linker, program, and utils coverage gaps * coverage: mock VMM fast-path test, parametrize scalar-subclass launch tests
1 parent 96c7f51 commit e350c5a

6 files changed

Lines changed: 507 additions & 132 deletions

File tree

cuda_core/tests/graph/test_graph_memory_resource.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,36 @@ def test_gmr_check_capture_state(mempool_device, mode):
290290
gb.end_building().complete()
291291

292292

293+
def test_graph_memory_resource_attributes_direct_init_raises():
294+
"""GraphMemoryResourceAttributes cannot be constructed directly."""
295+
from cuda.core._memory._graph_memory_resource import GraphMemoryResourceAttributes
296+
297+
with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
298+
GraphMemoryResourceAttributes()
299+
300+
301+
def test_graph_memory_resource_accessibility_flags(init_cuda):
302+
"""GraphMemoryResource exposes expected accessibility flags and device_id."""
303+
device = Device()
304+
gmr = GraphMemoryResource(device)
305+
assert gmr.is_device_accessible is True
306+
assert gmr.is_host_accessible is False
307+
assert gmr.device_id == int(device)
308+
309+
310+
def test_graph_memory_resource_attributes_repr(mempool_device):
311+
"""GraphMemoryResourceAttributes.__repr__ includes the class name and the 4 documented attributes."""
312+
device = mempool_device
313+
gmr = GraphMemoryResource(device)
314+
r = repr(gmr.attributes)
315+
assert r.startswith("GraphMemoryResourceAttributes(")
316+
assert r.endswith(")")
317+
assert "reserved_mem_current=" in r
318+
assert "reserved_mem_high=" in r
319+
assert "used_mem_current=" in r
320+
assert "used_mem_high=" in r
321+
322+
293323
@pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
294324
def test_dmr_check_capture_state(mempool_device, mode):
295325
"""

cuda_core/tests/test_launcher.py

Lines changed: 83 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,64 @@ def test_launch_config_native_conversion(init_cuda):
125125
pytest.skip("Driver or GPU not new enough for thread block clusters")
126126

127127

128+
def test_to_native_launch_config_no_cluster():
129+
"""Covers the no-cluster path of _to_native_launch_config; no Hopper+ required."""
130+
from cuda.core._launch_config import _to_native_launch_config
131+
132+
config = LaunchConfig(grid=(4, 5, 6), block=(7, 8, 9), shmem_size=128)
133+
native = _to_native_launch_config(config)
134+
assert native.gridDimX == 4, f"Expected gridDimX=4, got {native.gridDimX}"
135+
assert native.gridDimY == 5, f"Expected gridDimY=5, got {native.gridDimY}"
136+
assert native.gridDimZ == 6, f"Expected gridDimZ=6, got {native.gridDimZ}"
137+
assert native.blockDimX == 7, f"Expected blockDimX=7, got {native.blockDimX}"
138+
assert native.blockDimY == 8, f"Expected blockDimY=8, got {native.blockDimY}"
139+
assert native.blockDimZ == 9, f"Expected blockDimZ=9, got {native.blockDimZ}"
140+
assert native.sharedMemBytes == 128, f"Expected sharedMemBytes=128, got {native.sharedMemBytes}"
141+
assert native.numAttrs == 0, f"Expected numAttrs=0, got {native.numAttrs}"
142+
assert list(native.attrs) == [], f"Expected empty attrs, got {list(native.attrs)}"
143+
144+
145+
def test_launch_config_cooperative_unsupported(monkeypatch):
146+
"""LaunchConfig(is_cooperative=True) raises when device does not support it."""
147+
from cuda.core import _launch_config as _lc_mod
148+
149+
class _FakeProps:
150+
cooperative_launch = False
151+
152+
class _FakeDev:
153+
properties = _FakeProps()
154+
155+
monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
156+
with pytest.raises(CUDAError, match="cooperative kernels are not supported"):
157+
LaunchConfig(grid=1, block=1, is_cooperative=True)
158+
159+
160+
def test_to_native_launch_config_cooperative(monkeypatch):
161+
"""Covers the is_cooperative branch of _to_native_launch_config; Device is mocked so it runs on any GPU."""
162+
from cuda.bindings import driver
163+
from cuda.core import _launch_config as _lc_mod
164+
from cuda.core._launch_config import _to_native_launch_config
165+
166+
class _FakeProps:
167+
cooperative_launch = True
168+
169+
class _FakeDev:
170+
properties = _FakeProps()
171+
172+
monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
173+
174+
config = LaunchConfig(grid=2, block=4, is_cooperative=True)
175+
native = _to_native_launch_config(config)
176+
assert native.gridDimX == 2
177+
assert native.blockDimX == 4
178+
assert native.numAttrs == 1
179+
attr = native.attrs[0]
180+
assert attr.id == driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE, (
181+
f"Expected CU_LAUNCH_ATTRIBUTE_COOPERATIVE, got {attr.id}"
182+
)
183+
assert attr.value.cooperative == 1, f"Expected cooperative=1, got {attr.value.cooperative}"
184+
185+
128186
def test_launch_invalid_values(init_cuda):
129187
code = 'extern "C" __global__ void my_kernel() {}'
130188
program = Program(code, SourceCodeType.CXX)
@@ -403,28 +461,42 @@ class MyFloat(ctypes.c_float):
403461
class MyBool(ctypes.c_bool):
404462
pass
405463

406-
# These should NOT raise they should be handled via isinstance fallback
464+
# These should NOT raise; they should be handled via isinstance fallback
407465
holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
408466
assert holder.ptr != 0
409467

410468

411469
@requires_module(np, "2.1")
412-
def test_launch_scalar_argument_ctypes_subclass_fallback():
413-
"""Subclassed ctypes scalars survive the launch path and reach the kernel correctly."""
470+
@pytest.mark.parametrize(
471+
("scalar_kind", "np_dtype", "cpp_type", "raw_value"),
472+
[
473+
("ctypes", np.int32, "signed int", -123456),
474+
("numpy", np.float32, "float", 3.14),
475+
],
476+
ids=["ctypes_subclass", "numpy_subclass"],
477+
)
478+
def test_launch_scalar_argument_subclass_fallback(scalar_kind, np_dtype, cpp_type, raw_value):
479+
"""Subclassed scalar arguments survive fallback handling and reach the kernel."""
480+
if scalar_kind == "ctypes":
414481

415-
class MyInt32(ctypes.c_int32):
416-
pass
482+
class Subclassed(ctypes.c_int32):
483+
pass
484+
else:
485+
486+
class Subclassed(np.float32):
487+
pass
488+
489+
scalar = Subclassed(raw_value)
490+
expected = np_dtype(raw_value)
417491

418492
dev = Device()
419493
dev.set_current()
420494

421495
mr = LegacyPinnedMemoryResource()
422-
b = mr.allocate(np.dtype(np.int32).itemsize)
423-
arr = np.from_dlpack(b).view(np.int32)
496+
b = mr.allocate(np.dtype(np_dtype).itemsize)
497+
arr = np.from_dlpack(b).view(np_dtype)
424498
arr[:] = 0
425499

426-
scalar = MyInt32(-123456)
427-
428500
code = r"""
429501
template <typename T>
430502
__global__ void write_scalar(T* arr, T val) {
@@ -435,17 +507,16 @@ class MyInt32(ctypes.c_int32):
435507
arch = "".join(f"{i}" for i in dev.compute_capability)
436508
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
437509
prog = Program(code, code_type="c++", options=pro_opts)
438-
ker_name = "write_scalar<signed int>"
510+
ker_name = f"write_scalar<{cpp_type}>"
439511
mod = prog.compile("cubin", name_expressions=(ker_name,))
440512
ker = mod.get_kernel(ker_name)
441513

442-
# This exercises the prepare_ctypes_arg isinstance fallback through a real launch.
443514
stream = dev.default_stream
444515
config = LaunchConfig(grid=1, block=1)
445516
launch(stream, config, ker, arr.ctypes.data, scalar)
446517
stream.sync()
447518

448-
assert arr[0] == scalar.value
519+
assert arr[0] == expected
449520

450521

451522
def test_kernel_arg_numpy_subclass_isinstance_fallback():
@@ -462,46 +533,6 @@ class MyFloat32(np.float32):
462533
assert holder.ptr != 0
463534

464535

465-
@requires_module(np, "2.1")
466-
def test_launch_scalar_argument_numpy_subclass_fallback():
467-
"""Subclassed numpy scalars survive the launch path and reach the kernel correctly."""
468-
469-
class MyFloat32(np.float32):
470-
pass
471-
472-
dev = Device()
473-
dev.set_current()
474-
475-
mr = LegacyPinnedMemoryResource()
476-
b = mr.allocate(np.dtype(np.float32).itemsize)
477-
arr = np.from_dlpack(b).view(np.float32)
478-
arr[:] = 0.0
479-
480-
scalar = MyFloat32(3.14)
481-
482-
code = r"""
483-
template <typename T>
484-
__global__ void write_scalar(T* arr, T val) {
485-
arr[0] = val;
486-
}
487-
"""
488-
489-
arch = "".join(f"{i}" for i in dev.compute_capability)
490-
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
491-
prog = Program(code, code_type="c++", options=pro_opts)
492-
ker_name = "write_scalar<float>"
493-
mod = prog.compile("cubin", name_expressions=(ker_name,))
494-
ker = mod.get_kernel(ker_name)
495-
496-
# This exercises the prepare_numpy_arg isinstance fallback through a real launch.
497-
stream = dev.default_stream
498-
config = LaunchConfig(grid=1, block=1)
499-
launch(stream, config, ker, arr.ctypes.data, scalar)
500-
stream.sync()
501-
502-
assert arr[0] == scalar
503-
504-
505536
def test_kernel_arg_python_isinstance_fallbacks():
506537
"""Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
507538
from cuda.core._kernel_arg_handler import ParamHolder

cuda_core/tests/test_linker.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,93 @@ def test_which_backend_is_not_property(self):
280280
"""
281281
attr = inspect.getattr_static(Linker, "which_backend")
282282
assert not isinstance(attr, property)
283+
284+
285+
@pytest.fixture
286+
def driver_binding(monkeypatch):
287+
"""Pin _linker._driver to the real driver module so driver-backend tests run under any backend."""
288+
from cuda.bindings import driver
289+
290+
monkeypatch.setattr(_linker, "_driver", driver)
291+
return driver
292+
293+
294+
def test_prepare_driver_options_all_supported(driver_binding):
295+
"""Exercise every supported branch of _prepare_driver_options."""
296+
driver = driver_binding
297+
opts = LinkerOptions(
298+
arch="sm_80",
299+
max_register_count=32,
300+
verbose=True,
301+
link_time_optimization=True,
302+
optimization_level=2,
303+
debug=True,
304+
lineinfo=True,
305+
no_cache=True,
306+
)
307+
formatted, keys = opts._prepare_driver_options()
308+
assert len(formatted) == len(keys)
309+
assert len(keys) == 4 + 8 # 4 fixed log-buffer entries + 8 options set above
310+
311+
# Skip log-buffer entries; verify key-to-value mapping (catches swap/dup/wrong-value).
312+
payload_keys = keys[4:]
313+
assert len(set(payload_keys)) == len(payload_keys), f"duplicate option keys: {payload_keys}"
314+
option_to_value = dict(zip(payload_keys, formatted[4:]))
315+
assert option_to_value[driver.CUjit_option.CU_JIT_TARGET] == driver.CUjit_target.CU_TARGET_COMPUTE_80
316+
assert option_to_value[driver.CUjit_option.CU_JIT_MAX_REGISTERS] == 32
317+
assert option_to_value[driver.CUjit_option.CU_JIT_LOG_VERBOSE] == 1
318+
assert option_to_value[driver.CUjit_option.CU_JIT_LTO] == 1
319+
assert option_to_value[driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL] == 2
320+
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO] == 1
321+
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO] == 1
322+
assert option_to_value[driver.CUjit_option.CU_JIT_CACHE_MODE] == driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE
323+
324+
325+
@pytest.mark.parametrize(
326+
"kwargs,match",
327+
[
328+
({"ftz": True}, "ftz option is deprecated"),
329+
({"prec_div": True}, "prec_div option is deprecated"),
330+
({"prec_sqrt": True}, "prec_sqrt option is deprecated"),
331+
({"fma": True}, "fma options is deprecated"),
332+
({"kernels_used": "my_kernel"}, "kernels_used is deprecated"),
333+
({"variables_used": "my_var"}, "variables_used is deprecated"),
334+
({"optimize_unused_variables": True}, "optimize_unused_variables is deprecated"),
335+
],
336+
)
337+
def test_prepare_driver_options_deprecated_warnings(driver_binding, kwargs, match):
338+
"""Each driver-deprecated option emits a DeprecationWarning."""
339+
opts = LinkerOptions(**kwargs)
340+
with pytest.warns(DeprecationWarning, match=match):
341+
opts._prepare_driver_options()
342+
343+
344+
@pytest.mark.parametrize(
345+
"kwargs,match",
346+
[
347+
({"time": True}, "time option is not supported by the driver API"),
348+
({"ptx": True}, "ptx option is not supported by the driver API"),
349+
({"ptxas_options": ["-v"]}, "ptxas_options option is not supported by the driver API"),
350+
({"split_compile": 0}, "split_compile option is not supported by the driver API"),
351+
({"split_compile_extended": 1}, "split_compile_extended option is not supported by the driver API"),
352+
],
353+
)
354+
def test_prepare_driver_options_unsupported_raises(driver_binding, kwargs, match):
355+
"""Each nvjitlink-only option raises ValueError on the driver backend."""
356+
opts = LinkerOptions(**kwargs)
357+
with pytest.raises(ValueError, match=match):
358+
opts._prepare_driver_options()
359+
360+
361+
def test_linker_empty_object_codes_raises():
362+
"""Linker with no ObjectCode raises ValueError."""
363+
with pytest.raises(ValueError, match="At least one ObjectCode object must be provided"):
364+
Linker()
365+
366+
367+
def test_as_bytes_nvjitlink_unavailable(monkeypatch):
368+
"""as_bytes('nvjitlink') raises RuntimeError when the backend is unavailable."""
369+
monkeypatch.setattr(_linker, "_use_nvjitlink_backend", False)
370+
opts = LinkerOptions(arch="sm_80")
371+
with pytest.raises(RuntimeError, match="nvJitLink backend is not available"):
372+
opts.as_bytes("nvjitlink")

0 commit comments

Comments
 (0)