diff --git a/.github/ISSUE_TEMPLATE/1-bug_report.yml b/.github/ISSUE_TEMPLATE/1-bug_report.yml index 9941f8dc98b..9db233d7c52 100644 --- a/.github/ISSUE_TEMPLATE/1-bug_report.yml +++ b/.github/ISSUE_TEMPLATE/1-bug_report.yml @@ -39,7 +39,7 @@ body: - CUB - libcu++ - CUDA Experimental (cudax) - - cuda.coop (Python) + - cuda.coop.numba_cuda (Python) - cuda.compute (Python) - General CCCL - Infrastructure diff --git a/.github/ISSUE_TEMPLATE/2-feature_request.yml b/.github/ISSUE_TEMPLATE/2-feature_request.yml index 16b5a1fa74c..1e1544723ae 100644 --- a/.github/ISSUE_TEMPLATE/2-feature_request.yml +++ b/.github/ISSUE_TEMPLATE/2-feature_request.yml @@ -23,7 +23,7 @@ body: - CUB - libcu++ - CUDA Experimental (cudax) - - cuda.coop (Python) + - cuda.coop.numba_cuda (Python) - cuda.compute (Python) - General CCCL - Infrastructure diff --git a/.github/release.yml b/.github/release.yml index ccac75537f3..d99f42bcce4 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -7,7 +7,7 @@ changelog: - title: 📚 Libcudacxx labels: - "libcu++" - - title: 🤝 cuda.coop + - title: 🤝 cuda.coop.numba_cuda labels: - "cuda.coop" - title: ⚡ cuda.compute diff --git a/AGENTS.md b/AGENTS.md index e17e7921f5c..5d851d71427 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -206,7 +206,7 @@ Supported versions: `3.10`, `3.11`, `3.12`, `3.13` ### Modules * **cuda.compute** — Device-level algorithms, iterators, custom GPU types -* **cuda.coop** — Block/warp-level primitives +* **cuda.coop.numba_cuda** — Block/warp-level primitives for Numba CUDA * **cuda.cccl.headers** — Programmatic access to headers ### Installation @@ -245,7 +245,7 @@ Requirements: import cuda.compute result = cuda.compute.reduce_into(input_array, output_scalar, init_val, binary_op) -from cuda import coop +import cuda.coop.numba_cuda as coop @cuda.jit def kernel(data): coop.block.reduce(data, binary_op) diff --git a/ci/bench.template.yaml b/ci/bench.template.yaml index 637fddb7c02..c9553ccd887 100644 --- a/ci/bench.template.yaml +++ b/ci/bench.template.yaml @@ -38,7 +38,7 @@ benchmarks: # Examples: # - 'compute/reduce/sum\.py' # - 'compute/transform/.*\.py' - # - 'coop/bench_warp_reduce\.py' + # - 'coop/numba_cuda/bench_warp_reduce\.py' # Select GPUs. These are limited and shared, be intentional and conservative. gpus: diff --git a/ci/bench.yaml b/ci/bench.yaml index 637fddb7c02..c9553ccd887 100644 --- a/ci/bench.yaml +++ b/ci/bench.yaml @@ -38,7 +38,7 @@ benchmarks: # Examples: # - 'compute/reduce/sum\.py' # - 'compute/transform/.*\.py' - # - 'coop/bench_warp_reduce\.py' + # - 'coop/numba_cuda/bench_warp_reduce\.py' # Select GPUs. These are limited and shared, be intentional and conservative. gpus: diff --git a/ci/bench/README.md b/ci/bench/README.md index 990d1508134..deac39e58aa 100644 --- a/ci/bench/README.md +++ b/ci/bench/README.md @@ -71,7 +71,7 @@ For Python benchmarks, `compare_paths.sh`: Python filters are regex patterns matched against relative paths under `python/cuda_cccl/benchmarks/`, for example: - `compute/reduce/sum\.py` — single benchmark - `compute/transform/.*\.py` — all transform benchmarks -- `coop/.*\.py` — all coop benchmarks +- `coop/numba_cuda/.*\.py` — all coop benchmarks ## Artifacts diff --git a/ci/matrix.yaml b/ci/matrix.yaml index afe1ee4182a..570d2be1599 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -507,7 +507,7 @@ jobs: # Python: build_py_wheel: { name: "Build cuda.cccl", gpu: false, invoke: { prefix: 'build_cuda_cccl'} } test_py_headers: { name: "Test cuda.cccl.headers", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_headers'} } - test_py_coop: { name: "Test cuda.coop", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_coop'} } + test_py_coop: { name: "Test cuda.coop.numba_cuda", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_coop'} } test_py_par: { name: "Test cuda.compute", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_compute'} } test_py_examples: { name: "Test cuda.cccl.examples", gpu: true, needs: 'build_py_wheel', force_producer_ctk: "pybuild", invoke: { prefix: 'test_cuda_cccl_examples'} } diff --git a/ci/test_cuda_coop_python.sh b/ci/test_cuda_coop_python.sh index bf75a2f8d02..681f04b2c44 100755 --- a/ci/test_cuda_coop_python.sh +++ b/ci/test_cuda_coop_python.sh @@ -27,4 +27,4 @@ python -m pip install "${CUDA_CCCL_WHEEL_PATH}[test-cu${cuda_major_version}]" # Run tests for coop module cd "/home/coder/cccl/python/cuda_cccl/tests/" -python -m pytest -n auto -v coop/ +python -m pytest -n auto -v coop/numba_cuda/ diff --git a/ci/windows/build_cuda_cccl_python.ps1 b/ci/windows/build_cuda_cccl_python.ps1 index c7df806ac5a..beb5f4ce7ba 100644 --- a/ci/windows/build_cuda_cccl_python.ps1 +++ b/ci/windows/build_cuda_cccl_python.ps1 @@ -1,6 +1,6 @@ <# .SYNOPSIS - Build Python cuda-cccl wheels for the cuda.compute and cuda.coop packages + Build Python cuda-cccl wheels for the cuda.compute and cuda.coop.numba_cuda packages on Windows. .DESCRIPTION diff --git a/ci/windows/test_cuda_coop_python.ps1 b/ci/windows/test_cuda_coop_python.ps1 index 7eaa9c470b3..2d451c71d48 100644 --- a/ci/windows/test_cuda_coop_python.ps1 +++ b/ci/windows/test_cuda_coop_python.ps1 @@ -20,6 +20,6 @@ ${wheelPath} = Get-CudaCcclWheel Push-Location (Join-Path (Get-RepoRoot) "python/cuda_cccl/tests") try { - & $python -m pytest -n auto -v coop/ + & $python -m pytest -n auto -v coop/numba_cuda/ } finally { Pop-Location } diff --git a/cub/cub/block/block_radix_sort.cuh b/cub/cub/block/block_radix_sort.cuh index 23a5e25f6fc..21049484112 100644 --- a/cub/cub/block/block_radix_sort.cuh +++ b/cub/cub/block/block_radix_sort.cuh @@ -150,12 +150,12 @@ CUB_NAMESPACE_BEGIN //! //! .. code-block:: python //! -//! from cuda import coop +//! import cuda.coop.numba_cuda as coop //! from pynvjitlink import patch //! patch.patch_numba_linker(lto=True) //! //! # Specialize radix sort for a 1D block of 128 threads owning 4 integer items each -//! block_radix_sort = coop.block.radix_sort_keys(numba.int32, 128, 4) +//! block_radix_sort = coop.block.make_radix_sort_keys(numba.int32, 128, 4) //! temp_storage_bytes = block_radix_sort.temp_storage_bytes //! //! @cuda.jit(link=block_radix_sort.files) diff --git a/docs/python/coop.rst b/docs/python/coop.rst index c22393ed43d..ca22ee6a996 100644 --- a/docs/python/coop.rst +++ b/docs/python/coop.rst @@ -1,27 +1,27 @@ .. _cccl-python-coop: -``cuda.coop``: Cooperative Algorithms -===================================== +``cuda.coop.numba_cuda``: Cooperative Algorithms +================================================ -The ``cuda.coop`` library provides cooperative algorithms that operate +The ``cuda.coop.numba_cuda`` library provides cooperative algorithms that operate at the level of blocks and warps. It is designed to be used within `Numba CUDA kernels `_. -Here's an example showing how to use the ``cuda.coop`` library to +Here's an example showing how to use the ``cuda.coop.numba_cuda`` library to perform block-level reduction within a Numba CUDA kernel. -.. literalinclude:: ../../python/cuda_cccl/tests/coop/examples/block/reduce.py +.. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/examples/block/reduce.py :language: python :pyobject: custom_reduce_example - :caption: Block-level reduction example. `View complete source on GitHub `__ + :caption: Block-level reduction example. `View complete source on GitHub `__ Example Collections ------------------- For complete runnable examples and more advanced usage patterns, see our -full collection of `examples `_. +full collection of `examples `_. API Reference ------------- -- :ref:`cuda_coop-module` +- :ref:`cuda_coop_numba_cuda-module` diff --git a/docs/python/coop_api.rst b/docs/python/coop_api.rst index 5f06449e164..d5cffa18079 100644 --- a/docs/python/coop_api.rst +++ b/docs/python/coop_api.rst @@ -1,19 +1,19 @@ -.. _cuda_coop-module: +.. _cuda_coop_numba_cuda-module: -``cuda.coop`` API Reference -=========================== +``cuda.coop.numba_cuda`` API Reference +====================================== .. warning:: - ``cuda.coop`` is in public beta. + ``cuda.coop.numba_cuda`` is in public beta. The API is subject to change without notice. -.. automodule:: cuda.coop.warp +.. automodule:: cuda.coop.numba_cuda.warp :members: :undoc-members: :imported-members: -.. automodule:: cuda.coop.block +.. automodule:: cuda.coop.numba_cuda.block :members: :undoc-members: :imported-members: diff --git a/docs/python/index.rst b/docs/python/index.rst index 8c71c5ac3a8..39cbe1e1a1f 100644 --- a/docs/python/index.rst +++ b/docs/python/index.rst @@ -11,7 +11,7 @@ abstractions for CUDA Python developers. * :doc:`cuda.compute ` — Composable device-level primitives for building custom parallel algorithms, without writing CUDA kernels directly. -* :doc:`cuda.coop ` — Cooperative block- and warp-level algorithms for +* :doc:`cuda.coop.numba_cuda ` — Cooperative block- and warp-level algorithms for writing highly efficient CUDA kernels with `Numba CUDA `_. These libraries expose the generic, highly-optimized algorithms from the diff --git a/docs/python/resources.rst b/docs/python/resources.rst index 9243abf9cd2..66a80dadff8 100644 --- a/docs/python/resources.rst +++ b/docs/python/resources.rst @@ -7,7 +7,7 @@ Examples For recipes and patterns, see our examples: * ``cuda.compute`` `examples `_ -* ``cuda.coop`` `examples `_ +* ``cuda.coop.numba_cuda`` `examples `_ CUB and Thrust Documentation ---------------------------- diff --git a/python/cuda_cccl/README.md b/python/cuda_cccl/README.md index 527c1dafc72..9bf08fb8b63 100644 --- a/python/cuda_cccl/README.md +++ b/python/cuda_cccl/README.md @@ -6,7 +6,7 @@ provides a Pythonic interface to the It provides the following modules: - **`cuda.compute`** - Device-level parallel algorithms (reduce, scan, sort, etc.) and iterators -- **`cuda.coop`** - Block and warp-level cooperative primitives for custom CUDA kernels +- **`cuda.coop.numba_cuda`** - Block and warp-level cooperative primitives for custom CUDA kernels ## Installation @@ -46,4 +46,4 @@ For complete documentation, examples, and API reference, visit: - **Full Documentation**: [nvidia.github.io/cccl/python](https://nvidia.github.io/cccl/python) - **Repository**: [github.com/NVIDIA/cccl](https://github.com/NVIDIA/cccl) -- **Examples**: [github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/compute/examples](https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/compute/examples) and [github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/coop/examples](https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/coop/examples) +- **Examples**: [github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/compute/examples](https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/compute/examples) and [github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/coop/numba_cuda/examples](https://github.com/NVIDIA/cccl/tree/main/python/cuda_cccl/tests/coop/numba_cuda/examples) diff --git a/python/cuda_cccl/benchmarks/coop/README.md b/python/cuda_cccl/benchmarks/coop/numba_cuda/README.md similarity index 100% rename from python/cuda_cccl/benchmarks/coop/README.md rename to python/cuda_cccl/benchmarks/coop/numba_cuda/README.md diff --git a/python/cuda_cccl/benchmarks/coop/bench_warp_reduce.py b/python/cuda_cccl/benchmarks/coop/numba_cuda/bench_warp_reduce.py similarity index 100% rename from python/cuda_cccl/benchmarks/coop/bench_warp_reduce.py rename to python/cuda_cccl/benchmarks/coop/numba_cuda/bench_warp_reduce.py diff --git a/python/cuda_cccl/benchmarks/coop/device_side_benchmark.py b/python/cuda_cccl/benchmarks/coop/numba_cuda/device_side_benchmark.py similarity index 99% rename from python/cuda_cccl/benchmarks/coop/device_side_benchmark.py rename to python/cuda_cccl/benchmarks/coop/numba_cuda/device_side_benchmark.py index 83c8b71c944..970cc42c05f 100644 --- a/python/cuda_cccl/benchmarks/coop/device_side_benchmark.py +++ b/python/cuda_cccl/benchmarks/coop/numba_cuda/device_side_benchmark.py @@ -9,7 +9,7 @@ from numba.core.extending import intrinsic import cuda.bindings.driver as driver -from cuda import coop +import cuda.coop.numba_cuda as coop from cuda.core import Device diff --git a/python/cuda_cccl/cuda/cccl/cooperative/__init__.py b/python/cuda_cccl/cuda/cccl/cooperative/__init__.py deleted file mode 100644 index d4514af1667..00000000000 --- a/python/cuda_cccl/cuda/cccl/cooperative/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# -# SPDX-License -Identifier: Apache-2.0 WITH LLVM-exception - -from . import experimental - -__all__ = [ - "experimental", -] diff --git a/python/cuda_cccl/cuda/cccl/cooperative/experimental/__init__.py b/python/cuda_cccl/cuda/cccl/cooperative/experimental/__init__.py deleted file mode 100644 index 01d020c65cf..00000000000 --- a/python/cuda_cccl/cuda/cccl/cooperative/experimental/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# alias for backwards compatibility - -from warnings import warn - -from cuda.coop import * # noqa: F403 - -warn( - "The module cuda.cccl.cooperative.experimental is deprecated. Use cuda.coop instead.", - FutureWarning, -) diff --git a/python/cuda_cccl/cuda/coop/__init__.py b/python/cuda_cccl/cuda/coop/__init__.py index d83b19e6044..ff5205f9087 100644 --- a/python/cuda_cccl/cuda/coop/__init__.py +++ b/python/cuda_cccl/cuda/coop/__init__.py @@ -1,8 +1,7 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from . import block, warp -from ._types import StatefulFunction +"""Cooperative algorithm backends for CUDA Python DSLs.""" -__all__ = ["block", "warp", "StatefulFunction"] +__all__: list[str] = [] diff --git a/python/cuda_cccl/cuda/coop/numba_cuda/__init__.py b/python/cuda_cccl/cuda/coop/numba_cuda/__init__.py new file mode 100644 index 00000000000..fc9d3be7591 --- /dev/null +++ b/python/cuda_cccl/cuda/coop/numba_cuda/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from . import block, warp +from ._types import StatefulFunction + +__all__ = ["block", "warp", "StatefulFunction"] diff --git a/python/cuda_cccl/cuda/coop/_caching.py b/python/cuda_cccl/cuda/coop/numba_cuda/_caching.py similarity index 100% rename from python/cuda_cccl/cuda/coop/_caching.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_caching.py diff --git a/python/cuda_cccl/cuda/coop/_common.py b/python/cuda_cccl/cuda/coop/numba_cuda/_common.py similarity index 100% rename from python/cuda_cccl/cuda/coop/_common.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_common.py diff --git a/python/cuda_cccl/cuda/coop/_nvrtc.py b/python/cuda_cccl/cuda/coop/numba_cuda/_nvrtc.py similarity index 100% rename from python/cuda_cccl/cuda/coop/_nvrtc.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_nvrtc.py diff --git a/python/cuda_cccl/cuda/coop/_scan_op.py b/python/cuda_cccl/cuda/coop/numba_cuda/_scan_op.py similarity index 99% rename from python/cuda_cccl/cuda/coop/_scan_op.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_scan_op.py index cd3dc8d0086..1a48034ea82 100644 --- a/python/cuda_cccl/cuda/coop/_scan_op.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/_scan_op.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ -cuda.coop._scan_op +cuda.coop.numba_cuda._scan_op ================== This module implements the ``ScanOp`` class and related functions. diff --git a/python/cuda_cccl/cuda/coop/_types.py b/python/cuda_cccl/cuda/coop/numba_cuda/_types.py similarity index 100% rename from python/cuda_cccl/cuda/coop/_types.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_types.py diff --git a/python/cuda_cccl/cuda/coop/_typing.py b/python/cuda_cccl/cuda/coop/numba_cuda/_typing.py similarity index 100% rename from python/cuda_cccl/cuda/coop/_typing.py rename to python/cuda_cccl/cuda/coop/numba_cuda/_typing.py diff --git a/python/cuda_cccl/cuda/coop/block/__init__.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/__init__.py similarity index 100% rename from python/cuda_cccl/cuda/coop/block/__init__.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/__init__.py diff --git a/python/cuda_cccl/cuda/coop/block/_block_exchange.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_exchange.py similarity index 97% rename from python/cuda_cccl/cuda/coop/block/_block_exchange.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_exchange.py index b7704808b0a..4e80407b6e6 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_exchange.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_exchange.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ -cuda.coop.block_exchange +cuda.coop.numba_cuda.block_exchange ==================================== This module provides a set of :ref:`collective ` methods @@ -126,9 +126,9 @@ def kernel(thread_data): :param block_exchange_type: Exchange mode to perform. Currently, only :py:attr:`StripedToBlocked` is supported. :param dtype: Data type of input and output values. - :type dtype: :py:class:`cuda.coop._typing.DtypeType` + :type dtype: :py:class:`cuda.coop.numba_cuda._typing.DtypeType` :param threads_per_block: Number of threads in the block. - :type threads_per_block: :py:class:`cuda.coop._typing.DimType` + :type threads_per_block: :py:class:`cuda.coop.numba_cuda._typing.DimType` :param items_per_thread: Number of items owned by each thread. :type items_per_thread: int :param warp_time_slicing: Whether to use warp time-slicing. If true, @@ -143,7 +143,7 @@ def kernel(thread_data): :raises ValueError: If ``items_per_thread`` is less than 1. - :returns: An :py:class:`cuda.coop._types.Invocable` object + :returns: An :py:class:`cuda.coop.numba_cuda._types.Invocable` object representing the specialized kernel callable from a Numba JIT'd CUDA kernel. diff --git a/python/cuda_cccl/cuda/coop/block/_block_load_store.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_load_store.py similarity index 95% rename from python/cuda_cccl/cuda/coop/block/_block_load_store.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_load_store.py index a8ec6f1d4d0..705a061c8b5 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_load_store.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_load_store.py @@ -80,7 +80,7 @@ def make_load(dtype, threads_per_block, items_per_thread=1, algorithm="direct"): 128 integer items by 32 threads, with each thread handling 4 integers. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_load_store_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py :language: python :dedent: :start-after: example-begin imports @@ -89,7 +89,7 @@ def make_load(dtype, threads_per_block, items_per_thread=1, algorithm="direct"): The following snippet shows how to invoke the returned ``block_load`` and ``block_store`` primitives: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_load_store_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py :language: python :dedent: :start-after: example-begin load_store @@ -182,7 +182,7 @@ def make_store(dtype, threads_per_block, items_per_thread=1, algorithm="direct") 128 integer items by 32 threads, with each thread handling 4 integers. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_load_store_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py :language: python :dedent: :start-after: example-begin imports @@ -191,7 +191,7 @@ def make_store(dtype, threads_per_block, items_per_thread=1, algorithm="direct") The following snippet shows how to invoke the returned ``block_load`` and ``block_store`` primitives: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_load_store_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py :language: python :dedent: :start-after: example-begin load_store diff --git a/python/cuda_cccl/cuda/coop/block/_block_merge_sort.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_merge_sort.py similarity index 95% rename from python/cuda_cccl/cuda/coop/block/_block_merge_sort.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_merge_sort.py index c1417d5b993..400f4c6264b 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_merge_sort.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_merge_sort.py @@ -44,7 +44,7 @@ def make_merge_sort_keys( :ref:`blocked arrangement ` across 128 threads where each thread owns 4 consecutive keys. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_merge_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort_api.py :language: python :dedent: :start-after: example-begin imports @@ -53,7 +53,7 @@ def make_merge_sort_keys( The following snippet shows how to invoke the returned ``block_merge_sort`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_merge_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort_api.py :language: python :dedent: :start-after: example-begin merge-sort diff --git a/python/cuda_cccl/cuda/coop/block/_block_radix_sort.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_radix_sort.py similarity index 94% rename from python/cuda_cccl/cuda/coop/block/_block_radix_sort.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_radix_sort.py index 4764f35fbb3..a63d1627263 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_radix_sort.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_radix_sort.py @@ -143,7 +143,7 @@ def make_radix_sort_keys(dtype, threads_per_block, items_per_thread): :ref:`blocked arrangement ` across 128 threads where each thread owns 4 consecutive keys. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_radix_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py :language: python :dedent: :start-after: example-begin imports @@ -152,7 +152,7 @@ def make_radix_sort_keys(dtype, threads_per_block, items_per_thread): The following snippet shows how to invoke the returned ``block_radix_sort`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_radix_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py :language: python :dedent: :start-after: example-begin radix-sort @@ -189,7 +189,7 @@ def make_radix_sort_keys_descending(dtype, threads_per_block, items_per_thread): :ref:`blocked arrangement ` across 128 threads where each thread owns 4 consecutive keys. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_radix_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py :language: python :dedent: :start-after: example-begin imports @@ -198,7 +198,7 @@ def make_radix_sort_keys_descending(dtype, threads_per_block, items_per_thread): The following snippet shows how to invoke the returned ``block_radix_sort`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_radix_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py :language: python :dedent: :start-after: example-begin radix-sort-descending diff --git a/python/cuda_cccl/cuda/coop/block/_block_reduce.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_reduce.py similarity index 96% rename from python/cuda_cccl/cuda/coop/block/_block_reduce.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_reduce.py index 5271cbe9e67..80769cb004f 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_reduce.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_reduce.py @@ -213,7 +213,7 @@ def make_reduce( The code snippet below illustrates a max reduction of 128 integer items that are partitioned across 128 threads. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py :language: python :dedent: :start-after: example-begin imports @@ -222,7 +222,7 @@ def make_reduce( The following snippet shows how to invoke the returned ``block_reduce`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py :language: python :dedent: :start-after: example-begin reduce @@ -282,7 +282,7 @@ def make_sum( The code snippet below illustrates a sum of 128 integer items partitioned across 128 threads. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py :language: python :dedent: :start-after: example-begin imports @@ -291,7 +291,7 @@ def make_sum( The following snippet shows how to invoke the returned ``block_sum`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py :language: python :dedent: :start-after: example-begin sum diff --git a/python/cuda_cccl/cuda/coop/block/_block_scan.py b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_scan.py similarity index 99% rename from python/cuda_cccl/cuda/coop/block/_block_scan.py rename to python/cuda_cccl/cuda/coop/numba_cuda/block/_block_scan.py index 38c0359fa03..01c75b01c28 100644 --- a/python/cuda_cccl/cuda/coop/block/_block_scan.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/block/_block_scan.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception """ -cuda.coop.block_scan +cuda.coop.numba_cuda.block_scan =========================== This module provides a set of :ref:`collective ` @@ -674,7 +674,7 @@ def make_exclusive_sum( :ref:`blocked arrangement ` across 128 threads where each thread owns 4 consecutive items. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_scan_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_scan_api.py :language: python :dedent: :start-after: example-begin imports @@ -683,7 +683,7 @@ def make_exclusive_sum( The following snippet shows how to invoke the returned ``block_exclusive_sum`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_block_scan_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_block_scan_api.py :language: python :dedent: :start-after: example-begin exclusive-sum diff --git a/python/cuda_cccl/cuda/coop/warp/__init__.py b/python/cuda_cccl/cuda/coop/numba_cuda/warp/__init__.py similarity index 100% rename from python/cuda_cccl/cuda/coop/warp/__init__.py rename to python/cuda_cccl/cuda/coop/numba_cuda/warp/__init__.py diff --git a/python/cuda_cccl/cuda/coop/warp/_warp_merge_sort.py b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_merge_sort.py similarity index 96% rename from python/cuda_cccl/cuda/coop/warp/_warp_merge_sort.py rename to python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_merge_sort.py index 54d43b566e3..ba914f097d2 100644 --- a/python/cuda_cccl/cuda/coop/warp/_warp_merge_sort.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_merge_sort.py @@ -34,7 +34,7 @@ def make_merge_sort_keys( The following snippet shows how to invoke the returned ``warp_merge_sort`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_merge_sort_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort_api.py :language: python :dedent: :start-after: example-begin merge-sort diff --git a/python/cuda_cccl/cuda/coop/warp/_warp_reduce.py b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_reduce.py similarity index 92% rename from python/cuda_cccl/cuda/coop/warp/_warp_reduce.py rename to python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_reduce.py index 117a25919a3..63786dcacf7 100644 --- a/python/cuda_cccl/cuda/coop/warp/_warp_reduce.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_reduce.py @@ -31,7 +31,7 @@ def make_reduce(dtype, binary_op, threads_in_warp=32, methods=None): The code snippet below illustrates a max reduction of 32 integer items that are partitioned across a warp of threads. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py :language: python :dedent: :start-after: example-begin imports @@ -40,7 +40,7 @@ def make_reduce(dtype, binary_op, threads_in_warp=32, methods=None): The following snippet shows how to invoke the returned ``warp_reduce`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py :language: python :dedent: :start-after: example-begin reduce @@ -108,7 +108,7 @@ def make_sum(dtype, threads_in_warp=32): The code snippet below illustrates a reduction of 32 integer items that are partitioned across a warp of threads. - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py :language: python :dedent: :start-after: example-begin imports @@ -117,7 +117,7 @@ def make_sum(dtype, threads_in_warp=32): The following snippet shows how to invoke the returned ``warp_sum`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_reduce_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py :language: python :dedent: :start-after: example-begin sum diff --git a/python/cuda_cccl/cuda/coop/warp/_warp_scan.py b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_scan.py similarity index 92% rename from python/cuda_cccl/cuda/coop/warp/_warp_scan.py rename to python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_scan.py index 0c5cef53730..1d56f8d0623 100644 --- a/python/cuda_cccl/cuda/coop/warp/_warp_scan.py +++ b/python/cuda_cccl/cuda/coop/numba_cuda/warp/_warp_scan.py @@ -26,7 +26,7 @@ def make_exclusive_sum(dtype, threads_in_warp=32): The code snippet below illustrates an exclusive prefix sum of 32 integer items: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_scan_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan_api.py :language: python :dedent: :start-after: example-begin imports @@ -35,7 +35,7 @@ def make_exclusive_sum(dtype, threads_in_warp=32): The following snippet shows how to invoke the returned ``warp_exclusive_sum`` primitive: - .. literalinclude:: ../../python/cuda_cccl/tests/coop/test_warp_scan_api.py + .. literalinclude:: ../../python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan_api.py :language: python :dedent: :start-after: example-begin exclusive-sum diff --git a/python/cuda_cccl/pyproject.toml b/python/cuda_cccl/pyproject.toml index f5e65c872a9..9766fd779bf 100644 --- a/python/cuda_cccl/pyproject.toml +++ b/python/cuda_cccl/pyproject.toml @@ -145,6 +145,7 @@ known-first-party = [ "cuda.cccl.headers", "cuda.compute", "cuda.coop", + "cuda.coop.numba_cuda", ] [tool.pytest.ini_options] diff --git a/python/cuda_cccl/tests/coop/examples/block/__init__.py b/python/cuda_cccl/tests/coop/numba_cuda/examples/block/__init__.py similarity index 100% rename from python/cuda_cccl/tests/coop/examples/block/__init__.py rename to python/cuda_cccl/tests/coop/numba_cuda/examples/block/__init__.py diff --git a/python/cuda_cccl/tests/coop/examples/block/reduce.py b/python/cuda_cccl/tests/coop/numba_cuda/examples/block/reduce.py similarity index 99% rename from python/cuda_cccl/tests/coop/examples/block/reduce.py rename to python/cuda_cccl/tests/coop/numba_cuda/examples/block/reduce.py index cf8920cb838..fdf82340a22 100644 --- a/python/cuda_cccl/tests/coop/examples/block/reduce.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/examples/block/reduce.py @@ -10,7 +10,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/examples/block/scan.py b/python/cuda_cccl/tests/coop/numba_cuda/examples/block/scan.py similarity index 99% rename from python/cuda_cccl/tests/coop/examples/block/scan.py rename to python/cuda_cccl/tests/coop/numba_cuda/examples/block/scan.py index d6c49fa07cd..c3679a38a7e 100644 --- a/python/cuda_cccl/tests/coop/examples/block/scan.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/examples/block/scan.py @@ -10,7 +10,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/examples/warp/__init__.py b/python/cuda_cccl/tests/coop/numba_cuda/examples/warp/__init__.py similarity index 100% rename from python/cuda_cccl/tests/coop/examples/warp/__init__.py rename to python/cuda_cccl/tests/coop/numba_cuda/examples/warp/__init__.py diff --git a/python/cuda_cccl/tests/coop/examples/warp/reduce.py b/python/cuda_cccl/tests/coop/numba_cuda/examples/warp/reduce.py similarity index 99% rename from python/cuda_cccl/tests/coop/examples/warp/reduce.py rename to python/cuda_cccl/tests/coop/numba_cuda/examples/warp/reduce.py index a204854aeab..0bb12d82592 100644 --- a/python/cuda_cccl/tests/coop/examples/warp/reduce.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/examples/warp/reduce.py @@ -10,7 +10,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/helpers.py b/python/cuda_cccl/tests/coop/numba_cuda/helpers.py similarity index 100% rename from python/cuda_cccl/tests/coop/helpers.py rename to python/cuda_cccl/tests/coop/numba_cuda/helpers.py diff --git a/python/cuda_cccl/tests/coop/test_block_exchange.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_exchange.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_block_exchange.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_exchange.py index bee38dce1b0..faf876202ee 100644 --- a/python/cuda_cccl/tests/coop/test_block_exchange.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_exchange.py @@ -5,7 +5,7 @@ """ test_block_exchange.py -This file contains unit tests for cuda.coop.block_exchange. +This file contains unit tests for cuda.coop.numba_cuda.block_exchange. """ from functools import partial, reduce @@ -23,7 +23,7 @@ ) from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_load.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_load.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_block_load.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_load.py index 97c22aee8f7..290672a56c3 100644 --- a/python/cuda_cccl/tests/coop/test_block_load.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_load.py @@ -10,7 +10,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int, row_major_tid from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_load_store_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_block_load_store_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py index d341c4272b7..071a1ea576d 100644 --- a/python/cuda_cccl/tests/coop/test_block_load_store_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_load_store_api.py @@ -7,7 +7,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-end imports diff --git a/python/cuda_cccl/tests/coop/test_block_merge_sort.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort.py similarity index 99% rename from python/cuda_cccl/tests/coop/test_block_merge_sort.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort.py index b3713ef6608..484a34de0d7 100644 --- a/python/cuda_cccl/tests/coop/test_block_merge_sort.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort.py @@ -11,7 +11,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int, row_major_tid from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_merge_sort_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort_api.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_block_merge_sort_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort_api.py index 3c55457e2ec..f70b6dfa612 100644 --- a/python/cuda_cccl/tests/coop/test_block_merge_sort_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_merge_sort_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-begin imports numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_radix_sort.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort.py similarity index 99% rename from python/cuda_cccl/tests/coop/test_block_radix_sort.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort.py index 5a0ca21b3bd..0ab96aa1e4b 100644 --- a/python/cuda_cccl/tests/coop/test_block_radix_sort.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort.py @@ -10,7 +10,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int, row_major_tid from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_radix_sort_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_block_radix_sort_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py index 2c7991bc75a..985468677ea 100644 --- a/python/cuda_cccl/tests/coop/test_block_radix_sort_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_radix_sort_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-begin imports numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_reduce.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce.py similarity index 99% rename from python/cuda_cccl/tests/coop/test_block_reduce.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce.py index 8b1465bd860..5b846aa553d 100644 --- a/python/cuda_cccl/tests/coop/test_block_reduce.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce.py @@ -17,7 +17,7 @@ ) from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_reduce_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_block_reduce_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py index 1915202e6e9..4069632bd7d 100644 --- a/python/cuda_cccl/tests/coop/test_block_reduce_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_reduce_api.py @@ -7,7 +7,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_scan.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_scan.py similarity index 99% rename from python/cuda_cccl/tests/coop/test_block_scan.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_scan.py index ba072633c97..930f18dec08 100644 --- a/python/cuda_cccl/tests/coop/test_block_scan.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_scan.py @@ -5,7 +5,7 @@ """ test_block_scan.py -This file contains unit tests for cuda.coop.block_scan. It covers both +This file contains unit tests for cuda.coop.numba_cuda.block_scan. It covers both valid and invalid usage scenarios, tests sum-based scans, user-defined operators and types, prefix callback operators, and known operators such as min, max, and bitwise XOR. @@ -35,8 +35,8 @@ typeof_impl, ) -from cuda import coop -from cuda.coop.block._block_scan import ( +import cuda.coop.numba_cuda as coop +from cuda.coop.numba_cuda.block._block_scan import ( ScanOp, ) diff --git a/python/cuda_cccl/tests/coop/test_block_scan_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_scan_api.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_block_scan_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_scan_api.py index 165a3978eac..ef15c81f087 100644 --- a/python/cuda_cccl/tests/coop/test_block_scan_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_scan_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-begin imports numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_block_store.py b/python/cuda_cccl/tests/coop/numba_cuda/test_block_store.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_block_store.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_block_store.py index d08c6b0633c..48a047abd7c 100644 --- a/python/cuda_cccl/tests/coop/test_block_store.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_block_store.py @@ -10,7 +10,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int, row_major_tid from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_common.py b/python/cuda_cccl/tests/coop/numba_cuda/test_common.py similarity index 99% rename from python/cuda_cccl/tests/coop/test_common.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_common.py index f335f9e1782..9a9a2c3f54e 100644 --- a/python/cuda_cccl/tests/coop/test_common.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_common.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from cuda.coop._common import ( +from cuda.coop.numba_cuda._common import ( CudaSharedMemConfig, dim3, normalize_dim_param, diff --git a/python/cuda_cccl/tests/coop/test_scan_op.py b/python/cuda_cccl/tests/coop/numba_cuda/test_scan_op.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_scan_op.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_scan_op.py index c081822e758..0881f8a812d 100644 --- a/python/cuda_cccl/tests/coop/test_scan_op.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_scan_op.py @@ -5,7 +5,7 @@ """ test_scan_op.py -This file contains unit tests for the cuda.coop._scan_op +This file contains unit tests for the cuda.coop.numba_cuda._scan_op module's ``ScanOp`` class and related functionality. """ @@ -14,7 +14,7 @@ import numpy as np import pytest -from cuda.coop._scan_op import ( +from cuda.coop.numba_cuda._scan_op import ( CUDA_MAXIMUM, CUDA_MINIMUM, CUDA_STD_BIT_AND, diff --git a/python/cuda_cccl/tests/coop/test_warp_merge_sort.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_warp_merge_sort.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort.py index 14acddc37b5..79a1fc62480 100644 --- a/python/cuda_cccl/tests/coop/test_warp_merge_sort.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort.py @@ -7,7 +7,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_warp_merge_sort_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort_api.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_warp_merge_sort_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort_api.py index 209e641d16e..08b4d2e222a 100644 --- a/python/cuda_cccl/tests/coop/test_warp_merge_sort_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_merge_sort_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop def test_warp_merge_sort(): diff --git a/python/cuda_cccl/tests/coop/test_warp_reduce.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce.py similarity index 98% rename from python/cuda_cccl/tests/coop/test_warp_reduce.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce.py index 7f6ab3956c9..e38f7185be4 100644 --- a/python/cuda_cccl/tests/coop/test_warp_reduce.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce.py @@ -8,7 +8,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_warp_reduce_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_warp_reduce_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py index eddaa9de23d..2dab0f07e69 100644 --- a/python/cuda_cccl/tests/coop/test_warp_reduce_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_reduce_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-begin imports numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_warp_scan.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan.py similarity index 97% rename from python/cuda_cccl/tests/coop/test_warp_scan.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan.py index 26eee0f9e20..fe72ab5caa0 100644 --- a/python/cuda_cccl/tests/coop/test_warp_scan.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan.py @@ -8,7 +8,7 @@ from helpers import NUMBA_TYPES_TO_NP, random_int from numba import cuda, types -from cuda import coop +import cuda.coop.numba_cuda as coop numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/coop/test_warp_scan_api.py b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan_api.py similarity index 96% rename from python/cuda_cccl/tests/coop/test_warp_scan_api.py rename to python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan_api.py index f8422289d24..405d5e6433c 100644 --- a/python/cuda_cccl/tests/coop/test_warp_scan_api.py +++ b/python/cuda_cccl/tests/coop/numba_cuda/test_warp_scan_api.py @@ -6,7 +6,7 @@ import numpy as np from numba import cuda -from cuda import coop +import cuda.coop.numba_cuda as coop # example-begin imports numba.config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cuda_cccl/tests/test_examples.py b/python/cuda_cccl/tests/test_examples.py index 3648027b99a..31245964d41 100644 --- a/python/cuda_cccl/tests/test_examples.py +++ b/python/cuda_cccl/tests/test_examples.py @@ -22,10 +22,12 @@ def discover_examples(): tests_dir = Path(__file__).parent examples = [] - # Look for examples in both coop and compute directories - example_directories = ["coop/examples", "compute/examples"] + example_directories = [ + ("Coop Numba CUDA", "coop/numba_cuda/examples"), + ("Compute", "compute/examples"), + ] - for example_dir in example_directories: + for framework, example_dir in example_directories: example_path = tests_dir / example_dir if not example_path.exists(): continue @@ -42,20 +44,17 @@ def discover_examples(): rel_path = python_file.relative_to(tests_dir) # Convert path to module name (OS-agnostic) - # Example: coop/examples/block/reduce.py -> coop.examples.block.reduce + # Example: coop/numba_cuda/examples/block/reduce.py + # -> coop.numba_cuda.examples.block.reduce module_name = ".".join(rel_path.with_suffix("").parts) # Extract category info for display - parts = rel_path.parts - if len(parts) >= 3: - # e.g., coop/examples/block/reduce.py - framework = parts[0].title() # Coop or Compute - category = parts[2].title() # Block, Warp, Reduction, etc. - filename = parts[3].replace(".py", "").replace("_", " ").title() + parts = python_file.relative_to(example_path).parts + if len(parts) >= 2: + category = parts[0].title() # Block, Warp, Reduction, etc. + filename = parts[1].replace(".py", "").replace("_", " ").title() display_name = f"{framework} - {category} - {filename}" - elif len(parts) >= 2: - # e.g., coop/examples/reduce.py - framework = parts[0].title() + elif len(parts) == 1: filename = parts[-1].replace(".py", "").replace("_", " ").title() display_name = f"{framework} - {filename}" else: