Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion cudax/include/cuda/experimental/__group/fwd.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class group;

// mappings

template <::cuda::std::size_t _Np = ::cuda::std::dynamic_extent, bool _IsExhaustive = true>
template <::cuda::std::size_t _Count = ::cuda::std::dynamic_extent, bool _IsExhaustive = true>
class group_by;

template <class _Data, bool _IsExahustive>
Expand Down Expand Up @@ -97,6 +97,13 @@ inline constexpr bool __is_this_group_v<this_cluster<_Hierarchy>> = true;
template <class _Hierarchy>
inline constexpr bool __is_this_group_v<this_grid<_Hierarchy>> = true;

template <class _Tp>
inline constexpr bool __is_group_mapping_v = false;
template <::cuda::std::size_t _Count, bool _IsExhaustive>
inline constexpr bool __is_group_mapping_v<group_by<_Count, _IsExhaustive>> = true;
template <class _Data, bool _IsExhaustive>
inline constexpr bool __is_group_mapping_v<group_as<_Data, _IsExhaustive>> = true;

// tags

struct non_exhaustive_t;
Expand Down
32 changes: 21 additions & 11 deletions cudax/include/cuda/experimental/__group/group.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <cuda/experimental/__group/concepts.cuh>
#include <cuda/experimental/__group/fwd.cuh>
#include <cuda/experimental/__group/mapping/group_by.cuh>
#include <cuda/experimental/__group/mapping/mapping_result.cuh>
#include <cuda/experimental/__group/this_group.cuh>
#include <cuda/experimental/__group/traits.cuh>

Expand All @@ -57,8 +58,26 @@ class group

// todo(dabayer): static_assert that _Unit is (under) typename _ParentGroup::unit_type

[[nodiscard]] _CCCL_DEVICE_API static constexpr auto
__get_initial_mapping_result(const _ParentGroup& __parent) noexcept
{
using _ParentMappingResult = typename _ParentGroup::__mapping_result_type;
using _MappingResult =
::cuda::experimental::__mapping_result<1,
::cuda::experimental::__static_count_query_group<_Unit, _ParentGroup>(),
_ParentMappingResult::is_always_exhaustive(),
_ParentMappingResult::is_always_contiguous()>;
return _MappingResult{
1,
0,
::cuda::experimental::__count_query_group<unsigned, _Unit>(__parent),
::cuda::experimental::__rank_query_group<unsigned, _Unit>(__parent)};
}

using _ParentMappingResult = typename _ParentGroup::__mapping_result_type;
using _MappingResult = __group_mapping_result_t<_Mapping, _Unit, _ParentGroup>;
using _MappingResult = decltype(::cuda::std::declval<const _Mapping&>().map(
::cuda::std::declval<const _ParentGroup&>(),
__get_initial_mapping_result(::cuda::std::declval<const _ParentGroup&>())));
using _SynchronizerInstance =
__group_synchronizer_instance_t<_Synchronizer, _Unit, _ParentGroup, _Mapping, _MappingResult>;
static_assert(__group_mapping_result<_MappingResult>);
Expand All @@ -72,16 +91,7 @@ class group
[[nodiscard]] _CCCL_DEVICE_API static _MappingResult
__do_mapping(const _Mapping& __mapping, const _ParentGroup& __parent) noexcept
{
// Do not invoke the mapping for threads that are not part of the parent group.
if constexpr (!_ParentMappingResult::is_always_exhaustive())
{
if (!__parent.__mapping_result().is_valid())
{
return _MappingResult::invalid();
}
}

const auto __mapping_result = __mapping.map(_Unit{}, __parent);
const auto __mapping_result = __mapping.map(__parent, __get_initial_mapping_result(__parent));
if (__mapping_result.is_valid())
{
_CCCL_ASSERT(__mapping_result.group_rank() < __mapping_result.group_count(), "invalid group rank");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
//===----------------------------------------------------------------------===//
//
// Part of CUDA Experimental in CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_EXPERIMENTAL___GROUP_MAPPING_COMPOSITE_MAPPING_CUH
#define _CUDA_EXPERIMENTAL___GROUP_MAPPING_COMPOSITE_MAPPING_CUH

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/fold.h>
#include <cuda/std/__type_traits/is_nothrow_copy_constructible.h>
#include <cuda/std/tuple>

#include <cuda/experimental/__group/fwd.cuh>
#include <cuda/experimental/__group/queries.cuh>
#include <cuda/experimental/__group/traits.cuh>

#include <cuda/std/__cccl/prologue.h>

#if !defined(_CCCL_DOXYGEN_INVOKED)

// todo(dabayer): do we want to always use uint32_t for all counts/ranks?

namespace cuda::experimental
{
template <class... _Mappings>
class composite_mapping
{
::cuda::std::tuple<_Mappings...> __mappings_;

template <::cuda::std::size_t _Ip = 0, class _ParentGroup, class _PrevMappingResult>
[[nodiscard]] _CCCL_DEVICE_API auto
__map_impl(const _ParentGroup& __parent, const _PrevMappingResult& __prev_mapping_result) const noexcept
{
const auto __result = ::cuda::std::get<_Ip>(__mappings_).map(__parent, __prev_mapping_result);
if constexpr (_Ip + 1 < sizeof...(_Mappings))
{
return __map_impl<_Ip + 1>(__parent, __result);
}
else
{
return __result;
}
}

public:
_CCCL_DEVICE_API constexpr composite_mapping(const _Mappings&... __mappings) noexcept(
::cuda::std::__fold_and_v<::cuda::std::is_nothrow_copy_constructible_v<_Mappings>...>)
: __mappings_{__mappings...}
{}

[[nodiscard]] _CCCL_DEVICE_API constexpr const ::cuda::std::tuple<_Mappings...>& get() const noexcept
{
return __mappings_;
}

template <class _ParentGroup, class _PrevMappingResult>
[[nodiscard]] _CCCL_DEVICE_API auto
map(const _ParentGroup& __parent, const _PrevMappingResult& __prev_mapping_result) const noexcept
{
return __map_impl(__parent, __prev_mapping_result);
}
};

template <class... _Mappings>
_CCCL_DEVICE composite_mapping(const _Mappings&...) -> composite_mapping<_Mappings...>;

_CCCL_TEMPLATE(class _Lhs, class _Rhs)
_CCCL_REQUIRES(__is_group_mapping_v<_Lhs> _CCCL_AND __is_group_mapping_v<_Rhs>)
[[nodiscard]] _CCCL_DEVICE_API constexpr composite_mapping<_Lhs, _Rhs>
operator|(const _Lhs& __lhs, const _Rhs& __rhs) noexcept(
::cuda::std::is_nothrow_constructible_v<composite_mapping<_Lhs, _Rhs>, const _Lhs&, const _Rhs&>)
{
return {__lhs, __rhs};
}

_CCCL_TEMPLATE(class... _LhsMappings, class _Rhs)
_CCCL_REQUIRES(__is_group_mapping_v<_Rhs>)
[[nodiscard]] _CCCL_DEVICE_API constexpr composite_mapping<_LhsMappings..., _Rhs>
operator|(const composite_mapping<_LhsMappings...>& __lhs, const _Rhs& __rhs) noexcept(
::cuda::std::is_nothrow_constructible_v<composite_mapping<_LhsMappings..., _Rhs>, const _LhsMappings&..., const _Rhs&>)
{
return ::cuda::std::apply(
[&](const auto&... __lhs_mappings) {
return composite_mapping{__lhs_mappings..., __rhs};
},
__lhs.get());
}

_CCCL_TEMPLATE(class _Lhs, class... _RhsMappings)
_CCCL_REQUIRES(__is_group_mapping_v<_Lhs>)
[[nodiscard]] _CCCL_DEVICE_API constexpr composite_mapping<_Lhs, _RhsMappings...>
operator|(const _Lhs& __lhs, const composite_mapping<_RhsMappings...>& __rhs) noexcept(
::cuda::std::is_nothrow_constructible_v<composite_mapping<_Lhs, _RhsMappings...>, const _Lhs&, const _RhsMappings&...>)
{
return ::cuda::std::apply(
[&](const auto&... __rhs_mappings) {
return composite_mapping{__lhs, __rhs_mappings...};
},
__rhs.get());
}

template <class... _LhsMappings, class... _RhsMappings>
[[nodiscard]] _CCCL_DEVICE_API constexpr composite_mapping<_LhsMappings..., _RhsMappings...>
operator|(const composite_mapping<_LhsMappings...>& __lhs, const composite_mapping<_RhsMappings...>& __rhs) noexcept(
::cuda::std::is_nothrow_constructible_v<composite_mapping<_LhsMappings..., _RhsMappings...>,
const _LhsMappings&...,
const _RhsMappings&...>)
{
return ::cuda::std::apply(
[&](const auto&... __lhs_mappings) {
return ::cuda::std::apply(
[&](const auto&... __rhs_mappings) {
return composite_mapping{__lhs_mappings..., __rhs_mappings...};
},
__rhs.get());
},
__lhs.get());
}
} // namespace cuda::experimental

#endif // !_CCCL_DOXYGEN_INVOKED

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_EXPERIMENTAL___GROUP_MAPPING_COMPOSITE_MAPPING_CUH
Loading
Loading