NVIDIA
diff --git a/‎cuda_core/tests/graph/test_graph_memory_resource.py‎
Lines changed: 30 additions & 0 deletions b/‎cuda_core/tests/graph/test_graph_memory_resource.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎cuda_core/tests/test_launcher.py‎
Lines changed: 83 additions & 52 deletions b/‎cuda_core/tests/test_launcher.py‎
Lines changed: 83 additions & 52 deletions
diff --git a/‎cuda_core/tests/test_linker.py‎
Lines changed: 90 additions & 0 deletions b/‎cuda_core/tests/test_linker.py‎
Lines changed: 90 additions & 0 deletions
@@ -290,6 +290,36 @@ def test_gmr_check_capture_state(mempool_device, mode):
         gb.end_building().complete()
 
 
+def test_graph_memory_resource_attributes_direct_init_raises():
+    """GraphMemoryResourceAttributes cannot be constructed directly."""
+    from cuda.core._memory._graph_memory_resource import GraphMemoryResourceAttributes
+
+    with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
+        GraphMemoryResourceAttributes()
+
+
+def test_graph_memory_resource_accessibility_flags(init_cuda):
+    """GraphMemoryResource exposes expected accessibility flags and device_id."""
+    device = Device()
+    gmr = GraphMemoryResource(device)
+    assert gmr.is_device_accessible is True
+    assert gmr.is_host_accessible is False
+    assert gmr.device_id == int(device)
+
+
+def test_graph_memory_resource_attributes_repr(mempool_device):
+    """GraphMemoryResourceAttributes.__repr__ includes the class name and the 4 documented attributes."""
+    device = mempool_device
+    gmr = GraphMemoryResource(device)
+    r = repr(gmr.attributes)
+    assert r.startswith("GraphMemoryResourceAttributes(")
+    assert r.endswith(")")
+    assert "reserved_mem_current=" in r
+    assert "reserved_mem_high=" in r
+    assert "used_mem_current=" in r
+    assert "used_mem_high=" in r
+
+
 @pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
 def test_dmr_check_capture_state(mempool_device, mode):
     """
 
@@ -125,6 +125,64 @@ def test_launch_config_native_conversion(init_cuda):
         pytest.skip("Driver or GPU not new enough for thread block clusters")
 
 
+def test_to_native_launch_config_no_cluster():
+    """Covers the no-cluster path of _to_native_launch_config; no Hopper+ required."""
+    from cuda.core._launch_config import _to_native_launch_config
+
+    config = LaunchConfig(grid=(4, 5, 6), block=(7, 8, 9), shmem_size=128)
+    native = _to_native_launch_config(config)
+    assert native.gridDimX == 4, f"Expected gridDimX=4, got {native.gridDimX}"
+    assert native.gridDimY == 5, f"Expected gridDimY=5, got {native.gridDimY}"
+    assert native.gridDimZ == 6, f"Expected gridDimZ=6, got {native.gridDimZ}"
+    assert native.blockDimX == 7, f"Expected blockDimX=7, got {native.blockDimX}"
+    assert native.blockDimY == 8, f"Expected blockDimY=8, got {native.blockDimY}"
+    assert native.blockDimZ == 9, f"Expected blockDimZ=9, got {native.blockDimZ}"
+    assert native.sharedMemBytes == 128, f"Expected sharedMemBytes=128, got {native.sharedMemBytes}"
+    assert native.numAttrs == 0, f"Expected numAttrs=0, got {native.numAttrs}"
+    assert list(native.attrs) == [], f"Expected empty attrs, got {list(native.attrs)}"
+
+
+def test_launch_config_cooperative_unsupported(monkeypatch):
+    """LaunchConfig(is_cooperative=True) raises when device does not support it."""
+    from cuda.core import _launch_config as _lc_mod
+
+    class _FakeProps:
+        cooperative_launch = False
+
+    class _FakeDev:
+        properties = _FakeProps()
+
+    monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
+    with pytest.raises(CUDAError, match="cooperative kernels are not supported"):
+        LaunchConfig(grid=1, block=1, is_cooperative=True)
+
+
+def test_to_native_launch_config_cooperative(monkeypatch):
+    """Covers the is_cooperative branch of _to_native_launch_config; Device is mocked so it runs on any GPU."""
+    from cuda.bindings import driver
+    from cuda.core import _launch_config as _lc_mod
+    from cuda.core._launch_config import _to_native_launch_config
+
+    class _FakeProps:
+        cooperative_launch = True
+
+    class _FakeDev:
+        properties = _FakeProps()
+
+    monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
+
+    config = LaunchConfig(grid=2, block=4, is_cooperative=True)
+    native = _to_native_launch_config(config)
+    assert native.gridDimX == 2
+    assert native.blockDimX == 4
+    assert native.numAttrs == 1
+    attr = native.attrs[0]
+    assert attr.id == driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE, (
+        f"Expected CU_LAUNCH_ATTRIBUTE_COOPERATIVE, got {attr.id}"
+    )
+    assert attr.value.cooperative == 1, f"Expected cooperative=1, got {attr.value.cooperative}"
+
+
 def test_launch_invalid_values(init_cuda):
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, SourceCodeType.CXX)
@@ -403,28 +461,42 @@ class MyFloat(ctypes.c_float):
     class MyBool(ctypes.c_bool):
         pass
 
-    # These should NOT raise — they should be handled via isinstance fallback
+    # These should NOT raise; they should be handled via isinstance fallback
     holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
     assert holder.ptr != 0
 
 
 @requires_module(np, "2.1")
-def test_launch_scalar_argument_ctypes_subclass_fallback():
-    """Subclassed ctypes scalars survive the launch path and reach the kernel correctly."""
+@pytest.mark.parametrize(
+    ("scalar_kind", "np_dtype", "cpp_type", "raw_value"),
+    [
+        ("ctypes", np.int32, "signed int", -123456),
+        ("numpy", np.float32, "float", 3.14),
+    ],
+    ids=["ctypes_subclass", "numpy_subclass"],
+)
+def test_launch_scalar_argument_subclass_fallback(scalar_kind, np_dtype, cpp_type, raw_value):
+    """Subclassed scalar arguments survive fallback handling and reach the kernel."""
+    if scalar_kind == "ctypes":
 
-    class MyInt32(ctypes.c_int32):
-        pass
+        class Subclassed(ctypes.c_int32):
+            pass
+    else:
+
+        class Subclassed(np.float32):
+            pass
+
+    scalar = Subclassed(raw_value)
+    expected = np_dtype(raw_value)
 
     dev = Device()
     dev.set_current()
 
     mr = LegacyPinnedMemoryResource()
-    b = mr.allocate(np.dtype(np.int32).itemsize)
-    arr = np.from_dlpack(b).view(np.int32)
+    b = mr.allocate(np.dtype(np_dtype).itemsize)
+    arr = np.from_dlpack(b).view(np_dtype)
     arr[:] = 0
 
-    scalar = MyInt32(-123456)
-
     code = r"""
     template <typename T>
     __global__ void write_scalar(T* arr, T val) {
@@ -435,17 +507,16 @@ class MyInt32(ctypes.c_int32):
     arch = "".join(f"{i}" for i in dev.compute_capability)
     pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
     prog = Program(code, code_type="c++", options=pro_opts)
-    ker_name = "write_scalar<signed int>"
+    ker_name = f"write_scalar<{cpp_type}>"
     mod = prog.compile("cubin", name_expressions=(ker_name,))
     ker = mod.get_kernel(ker_name)
 
-    # This exercises the prepare_ctypes_arg isinstance fallback through a real launch.
     stream = dev.default_stream
     config = LaunchConfig(grid=1, block=1)
     launch(stream, config, ker, arr.ctypes.data, scalar)
     stream.sync()
 
-    assert arr[0] == scalar.value
+    assert arr[0] == expected
 
 
 def test_kernel_arg_numpy_subclass_isinstance_fallback():
@@ -462,46 +533,6 @@ class MyFloat32(np.float32):
     assert holder.ptr != 0
 
 
-@requires_module(np, "2.1")
-def test_launch_scalar_argument_numpy_subclass_fallback():
-    """Subclassed numpy scalars survive the launch path and reach the kernel correctly."""
-
-    class MyFloat32(np.float32):
-        pass
-
-    dev = Device()
-    dev.set_current()
-
-    mr = LegacyPinnedMemoryResource()
-    b = mr.allocate(np.dtype(np.float32).itemsize)
-    arr = np.from_dlpack(b).view(np.float32)
-    arr[:] = 0.0
-
-    scalar = MyFloat32(3.14)
-
-    code = r"""
-    template <typename T>
-    __global__ void write_scalar(T* arr, T val) {
-        arr[0] = val;
-    }
-    """
-
-    arch = "".join(f"{i}" for i in dev.compute_capability)
-    pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
-    prog = Program(code, code_type="c++", options=pro_opts)
-    ker_name = "write_scalar<float>"
-    mod = prog.compile("cubin", name_expressions=(ker_name,))
-    ker = mod.get_kernel(ker_name)
-
-    # This exercises the prepare_numpy_arg isinstance fallback through a real launch.
-    stream = dev.default_stream
-    config = LaunchConfig(grid=1, block=1)
-    launch(stream, config, ker, arr.ctypes.data, scalar)
-    stream.sync()
-
-    assert arr[0] == scalar
-
-
 def test_kernel_arg_python_isinstance_fallbacks():
     """Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
     from cuda.core._kernel_arg_handler import ParamHolder
 
@@ -280,3 +280,93 @@ def test_which_backend_is_not_property(self):
         """
         attr = inspect.getattr_static(Linker, "which_backend")
         assert not isinstance(attr, property)
+
+
+@pytest.fixture
+def driver_binding(monkeypatch):
+    """Pin _linker._driver to the real driver module so driver-backend tests run under any backend."""
+    from cuda.bindings import driver
+
+    monkeypatch.setattr(_linker, "_driver", driver)
+    return driver
+
+
+def test_prepare_driver_options_all_supported(driver_binding):
+    """Exercise every supported branch of _prepare_driver_options."""
+    driver = driver_binding
+    opts = LinkerOptions(
+        arch="sm_80",
+        max_register_count=32,
+        verbose=True,
+        link_time_optimization=True,
+        optimization_level=2,
+        debug=True,
+        lineinfo=True,
+        no_cache=True,
+    )
+    formatted, keys = opts._prepare_driver_options()
+    assert len(formatted) == len(keys)
+    assert len(keys) == 4 + 8  # 4 fixed log-buffer entries + 8 options set above
+
+    # Skip log-buffer entries; verify key-to-value mapping (catches swap/dup/wrong-value).
+    payload_keys = keys[4:]
+    assert len(set(payload_keys)) == len(payload_keys), f"duplicate option keys: {payload_keys}"
+    option_to_value = dict(zip(payload_keys, formatted[4:]))
+    assert option_to_value[driver.CUjit_option.CU_JIT_TARGET] == driver.CUjit_target.CU_TARGET_COMPUTE_80
+    assert option_to_value[driver.CUjit_option.CU_JIT_MAX_REGISTERS] == 32
+    assert option_to_value[driver.CUjit_option.CU_JIT_LOG_VERBOSE] == 1
+    assert option_to_value[driver.CUjit_option.CU_JIT_LTO] == 1
+    assert option_to_value[driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL] == 2
+    assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO] == 1
+    assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO] == 1
+    assert option_to_value[driver.CUjit_option.CU_JIT_CACHE_MODE] == driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE
+
+
+@pytest.mark.parametrize(
+    "kwargs,match",
+    [
+        ({"ftz": True}, "ftz option is deprecated"),
+        ({"prec_div": True}, "prec_div option is deprecated"),
+        ({"prec_sqrt": True}, "prec_sqrt option is deprecated"),
+        ({"fma": True}, "fma options is deprecated"),
+        ({"kernels_used": "my_kernel"}, "kernels_used is deprecated"),
+        ({"variables_used": "my_var"}, "variables_used is deprecated"),
+        ({"optimize_unused_variables": True}, "optimize_unused_variables is deprecated"),
+    ],
+)
+def test_prepare_driver_options_deprecated_warnings(driver_binding, kwargs, match):
+    """Each driver-deprecated option emits a DeprecationWarning."""
+    opts = LinkerOptions(**kwargs)
+    with pytest.warns(DeprecationWarning, match=match):
+        opts._prepare_driver_options()
+
+
+@pytest.mark.parametrize(
+    "kwargs,match",
+    [
+        ({"time": True}, "time option is not supported by the driver API"),
+        ({"ptx": True}, "ptx option is not supported by the driver API"),
+        ({"ptxas_options": ["-v"]}, "ptxas_options option is not supported by the driver API"),
+        ({"split_compile": 0}, "split_compile option is not supported by the driver API"),
+        ({"split_compile_extended": 1}, "split_compile_extended option is not supported by the driver API"),
+    ],
+)
+def test_prepare_driver_options_unsupported_raises(driver_binding, kwargs, match):
+    """Each nvjitlink-only option raises ValueError on the driver backend."""
+    opts = LinkerOptions(**kwargs)
+    with pytest.raises(ValueError, match=match):
+        opts._prepare_driver_options()
+
+
+def test_linker_empty_object_codes_raises():
+    """Linker with no ObjectCode raises ValueError."""
+    with pytest.raises(ValueError, match="At least one ObjectCode object must be provided"):
+        Linker()
+
+
+def test_as_bytes_nvjitlink_unavailable(monkeypatch):
+    """as_bytes('nvjitlink') raises RuntimeError when the backend is unavailable."""
+    monkeypatch.setattr(_linker, "_use_nvjitlink_backend", False)
+    opts = LinkerOptions(arch="sm_80")
+    with pytest.raises(RuntimeError, match="nvJitLink backend is not available"):
+        opts.as_bytes("nvjitlink")