Skip to content

Commit 788bbad

Browse files
authored
coverage: add cuda.core tests for host, launcher cluster, strided layout, program-cache, and DLPack utils (#2168)
* coverage: add cuda.core tests for host, launcher cluster, strided layout, program-cache, and DLPack utils * coverage: drop test_utils.py and test_strided_layout.py changes
1 parent 4bb52eb commit 788bbad

3 files changed

Lines changed: 264 additions & 0 deletions

File tree

cuda_core/tests/test_host.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,19 @@ def test_eq_hash(self):
5353
assert Host() != Host(numa_id=0)
5454
assert Host.numa_current() != Host()
5555
assert hash(Host(numa_id=1)) == hash(Host(numa_id=1))
56+
57+
def test_repr(self):
58+
assert repr(Host()) == "Host()"
59+
assert repr(Host(numa_id=2)) == "Host(numa_id=2)"
60+
assert repr(Host.numa_current()) == "Host.numa_current()"
61+
62+
def test_pickle_roundtrip_preserves_singleton(self):
63+
# __reduce__ routes numa_current through _reconstruct_numa_current and
64+
# the others through Host(numa_id); both rebuild the same singleton.
65+
# copy.copy / copy.deepcopy share the same __reduce__ machinery.
66+
import copy
67+
import pickle
68+
69+
for h in (Host(), Host(numa_id=4), Host.numa_current()):
70+
assert pickle.loads(pickle.dumps(h)) is h # noqa: S301
71+
assert copy.copy(h) is h

cuda_core/tests/test_launcher.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,76 @@ class _FakeDev:
183183
assert attr.value.cooperative == 1, f"Expected cooperative=1, got {attr.value.cooperative}"
184184

185185

186+
def test_launch_config_cluster_accepts_hopper_cc(monkeypatch):
187+
"""LaunchConfig accepts ``cluster`` when the device reports compute
188+
capability >= 9.0. Device is mocked so the cluster-cast branch runs on any
189+
GPU (real cluster support otherwise requires Hopper+)."""
190+
from cuda.core import _launch_config as _lc_mod
191+
192+
class _FakeDev:
193+
compute_capability = (9, 0)
194+
195+
# looked_up confirms the mock took effect.
196+
looked_up = []
197+
monkeypatch.setattr(_lc_mod, "Device", lambda: looked_up.append(1) or _FakeDev())
198+
199+
config = LaunchConfig(grid=(2, 3), cluster=(2, 2), block=32)
200+
assert looked_up, "Device was not looked up via the module global; mock did not take effect"
201+
assert config.cluster == (2, 2, 1)
202+
assert config.grid == (2, 3, 1)
203+
204+
205+
def test_launch_config_cluster_rejects_pre_hopper_cc(monkeypatch):
206+
"""LaunchConfig(cluster=...) raises on a device with compute capability < 9.0."""
207+
from cuda.core import _launch_config as _lc_mod
208+
209+
class _FakeDev:
210+
compute_capability = (8, 6)
211+
212+
# looked_up confirms the mock took effect.
213+
looked_up = []
214+
monkeypatch.setattr(_lc_mod, "Device", lambda: looked_up.append(1) or _FakeDev())
215+
216+
with pytest.raises(CUDAError, match="thread block clusters are not supported"):
217+
LaunchConfig(grid=2, cluster=2, block=32)
218+
assert looked_up, "Device was not looked up via the module global; mock did not take effect"
219+
220+
221+
def test_to_native_launch_config_cluster_branch():
222+
"""Covers the cluster branch of ``_to_native_launch_config`` (grid is
223+
converted from cluster units to block units, plus the cluster-dimension
224+
attribute) without requiring Hopper.
225+
226+
The cc gate lives in ``LaunchConfig.__init__``; ``cluster`` itself is a
227+
public attribute, so setting it on a cluster-free config yields the exact
228+
object ``__init__`` would build on Hopper and lets the conversion run on
229+
any GPU.
230+
231+
Note: this exercises the standalone ``cpdef _to_native_launch_config``
232+
function (a duplicate of the ``LaunchConfig._to_native_launch_config``
233+
cdef method, slated for removal once all modules are cythonized), not the
234+
cdef method that ``launch`` / ``Module`` actually call in production.
235+
"""
236+
from cuda.bindings import driver
237+
from cuda.core._launch_config import _to_native_launch_config
238+
239+
config = LaunchConfig(grid=(2, 3, 4), block=(5, 6, 7))
240+
config.cluster = (2, 2, 2)
241+
native = _to_native_launch_config(config)
242+
243+
# grid (in cluster units) * cluster -> block units
244+
assert native.gridDimX == 4
245+
assert native.gridDimY == 6
246+
assert native.gridDimZ == 8
247+
assert native.blockDimX == 5
248+
assert native.blockDimY == 6
249+
assert native.blockDimZ == 7
250+
assert native.numAttrs == 1
251+
attr = native.attrs[0]
252+
assert attr.id == driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
253+
assert (attr.value.clusterDim.x, attr.value.clusterDim.y, attr.value.clusterDim.z) == (2, 2, 2)
254+
255+
186256
def test_launch_invalid_values(init_cuda):
187257
code = 'extern "C" __global__ void my_kernel() {}'
188258
program = Program(code, SourceCodeType.CXX)

cuda_core/tests/test_program_cache.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,184 @@ def thread_b():
20002000
assert cache._tracked_size_bytes == 0, f"tracker went negative: {cache._tracked_size_bytes}"
20012001

20022002

2003+
def test_filestream_delitem_missing_key_with_cap_raises_keyerror(tmp_path):
2004+
"""With a size cap active, ``__delitem__`` of an absent key raises KeyError
2005+
from the stat-before-unlink miss branch (so the tracker stays correct)."""
2006+
from cuda.core.utils import FileStreamProgramCache
2007+
2008+
with FileStreamProgramCache(tmp_path / "fc", max_size_bytes=1000) as cache, pytest.raises(KeyError):
2009+
del cache[b"absent"]
2010+
2011+
2012+
def test_filestream_clear_with_cap_resets_tracker(tmp_path):
2013+
"""``clear()`` re-derives the size tracker from the post-clear disk state
2014+
when a size cap is active."""
2015+
from cuda.core.utils import FileStreamProgramCache
2016+
2017+
with FileStreamProgramCache(tmp_path / "fc", max_size_bytes=10_000) as cache:
2018+
cache[b"a"] = b"a" * 100
2019+
cache[b"b"] = b"b" * 100
2020+
assert len(cache) == 2
2021+
assert cache._tracked_size_bytes == 200
2022+
2023+
cache.clear()
2024+
assert len(cache) == 0
2025+
assert cache._tracked_size_bytes == 0
2026+
2027+
2028+
def test_filestream_iter_entry_paths_skips_stray_top_level_file(tmp_path):
2029+
"""A non-directory file sitting directly in ``entries/`` is ignored; only
2030+
the two-level digest shards hold real entries."""
2031+
from cuda.core.utils import FileStreamProgramCache
2032+
2033+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2034+
cache[b"k"] = b"v"
2035+
stray = cache._entries / "not-a-shard"
2036+
stray.write_bytes(b"junk")
2037+
# The stray top-level file is skipped; only the real entry counts.
2038+
assert len(cache) == 1
2039+
2040+
2041+
def test_filestream_iter_entry_paths_returns_when_entries_dir_missing(tmp_path):
2042+
"""``_iter_entry_paths`` returns cleanly (len 0) if ``entries/`` vanishes."""
2043+
import shutil
2044+
2045+
from cuda.core.utils import FileStreamProgramCache
2046+
2047+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2048+
cache[b"k"] = b"v"
2049+
shutil.rmtree(cache._entries)
2050+
assert len(cache) == 0
2051+
2052+
2053+
def test_filestream_sum_tmp_sizes_returns_zero_when_tmp_dir_missing(tmp_path):
2054+
"""``_sum_tmp_sizes`` (via ``_iter_tmp_entries``) returns 0 if ``tmp/`` is gone."""
2055+
import shutil
2056+
2057+
from cuda.core.utils import FileStreamProgramCache
2058+
2059+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2060+
shutil.rmtree(cache._tmp)
2061+
assert cache._sum_tmp_sizes() == 0
2062+
2063+
2064+
def test_filestream_enforce_size_cap_noop_without_cap(tmp_path):
2065+
"""``_enforce_size_cap`` returns immediately when no size cap is configured."""
2066+
from cuda.core.utils import FileStreamProgramCache
2067+
2068+
with FileStreamProgramCache(tmp_path / "fc") as cache: # max_size_bytes=None
2069+
cache[b"k"] = b"v"
2070+
cache._enforce_size_cap() # no-op; must not raise or evict
2071+
assert len(cache) == 1
2072+
2073+
2074+
def test_filestream_touch_atime_path_fallback_swallows_stat_failure(tmp_path, monkeypatch):
2075+
"""In the path-based fallback (the Windows code path), a failing
2076+
``path.stat()`` is swallowed: ``_touch_atime`` returns without raising
2077+
and without calling ``os.utime`` -- the entry just isn't re-stamped."""
2078+
import os as _os
2079+
2080+
from cuda.core.utils import FileStreamProgramCache, _program_cache
2081+
from cuda.core.utils._program_cache._file_stream import _touch_atime
2082+
2083+
monkeypatch.setattr(_program_cache._file_stream, "_UTIME_SUPPORTS_FD", False)
2084+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2085+
cache[b"k"] = b"v"
2086+
path = cache._path_for_key(b"k")
2087+
st_before = path.stat()
2088+
path.unlink() # now the fallback's re-stat raises FileNotFoundError (an OSError)
2089+
2090+
utime_calls = []
2091+
monkeypatch.setattr(_os, "utime", lambda *a, **k: utime_calls.append((a, k)))
2092+
2093+
# Best-effort: the failing stat is swallowed -- no exception, no utime.
2094+
assert _touch_atime(path, st_before) is None
2095+
assert not utime_calls, "os.utime must not run when path.stat() fails"
2096+
2097+
2098+
def test_filestream_touch_atime_swallows_open_failure(tmp_path, monkeypatch):
2099+
"""The best-effort atime bump swallows an ``os.open`` failure: the read
2100+
still returns the cached bytes and never reaches ``os.utime``."""
2101+
import os as _os
2102+
2103+
from cuda.core.utils import FileStreamProgramCache, _program_cache
2104+
2105+
monkeypatch.setattr(_program_cache._file_stream, "_UTIME_SUPPORTS_FD", True)
2106+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2107+
cache[b"k"] = b"v"
2108+
entry_path = cache._path_for_key(b"k")
2109+
2110+
# Fail only this entry's atime-bump open; let other os.open calls pass
2111+
# through so a broken read can't masquerade as the swallowed failure.
2112+
real_open = _os.open
2113+
opened = []
2114+
2115+
def _failing_open(path, flags, *args, **kwargs):
2116+
if _os.fspath(path) == _os.fspath(entry_path) and flags == _os.O_RDONLY:
2117+
opened.append(path)
2118+
raise OSError("open refused")
2119+
return real_open(path, flags, *args, **kwargs)
2120+
2121+
utime_calls = []
2122+
monkeypatch.setattr(_os, "open", _failing_open)
2123+
monkeypatch.setattr(_os, "utime", lambda *a, **k: utime_calls.append((a, k)))
2124+
2125+
assert cache[b"k"] == b"v"
2126+
assert opened, "the atime bump should have attempted os.open on the entry"
2127+
assert not utime_calls, "os.utime must not run after os.open fails"
2128+
2129+
2130+
def test_filestream_touch_atime_swallows_fstat_failure(tmp_path, monkeypatch):
2131+
"""The best-effort atime bump swallows an ``os.fstat`` failure after the fd
2132+
was opened: the read still returns the cached bytes, closes the fd, and
2133+
never reaches ``os.utime``."""
2134+
import os as _os
2135+
2136+
from cuda.core.utils import FileStreamProgramCache, _program_cache
2137+
2138+
monkeypatch.setattr(_program_cache._file_stream, "_UTIME_SUPPORTS_FD", True)
2139+
with FileStreamProgramCache(tmp_path / "fc") as cache:
2140+
cache[b"k"] = b"v"
2141+
entry_path = cache._path_for_key(b"k")
2142+
2143+
# Record the fd the atime bump opens so we can prove it gets closed even
2144+
# though fstat fails -- a leaked fd would block deletes on Windows.
2145+
real_open = _os.open
2146+
opened_fds = []
2147+
2148+
def _recording_open(path, flags, *args, **kwargs):
2149+
fd = real_open(path, flags, *args, **kwargs)
2150+
if _os.fspath(path) == _os.fspath(entry_path) and flags == _os.O_RDONLY:
2151+
opened_fds.append(fd)
2152+
return fd
2153+
2154+
closed_fds = []
2155+
real_close = _os.close
2156+
2157+
def _recording_close(fd):
2158+
closed_fds.append(fd)
2159+
return real_close(fd)
2160+
2161+
# os.fstat runs only in the atime bump here; the wrapper forces and confirms the swallowed failure.
2162+
fstat_calls = []
2163+
2164+
def _failing_fstat(fd):
2165+
fstat_calls.append(fd)
2166+
raise OSError("fstat refused")
2167+
2168+
utime_calls = []
2169+
monkeypatch.setattr(_os, "open", _recording_open)
2170+
monkeypatch.setattr(_os, "close", _recording_close)
2171+
monkeypatch.setattr(_os, "fstat", _failing_fstat)
2172+
monkeypatch.setattr(_os, "utime", lambda *a, **k: utime_calls.append((a, k)))
2173+
2174+
assert cache[b"k"] == b"v"
2175+
assert fstat_calls, "the atime bump should have attempted os.fstat"
2176+
assert opened_fds, "the atime bump should have opened the entry fd"
2177+
assert opened_fds[0] in closed_fds, "the opened fd must be closed even when fstat fails"
2178+
assert not utime_calls, "os.utime must not run after os.fstat fails"
2179+
2180+
20032181
def test_make_program_cache_key_changes_with_key_schema_version(monkeypatch):
20042182
"""Bumping ``_KEY_SCHEMA_VERSION`` produces a different cache key for
20052183
the same logical inputs. That's what makes a schema bump invalidate

0 commit comments

Comments
 (0)