Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
ea3f956
is_trivially_copyable_relaxed
fbusato Apr 1, 2026
9dd60db
add documentation
fbusato Apr 1, 2026
179a81b
describe custom specialization
fbusato Apr 1, 2026
2c33c2e
move to internal function
fbusato Apr 1, 2026
fbade34
address padding
fbusato Apr 1, 2026
deb622c
fix clang
fbusato Apr 1, 2026
a553bb6
do not handle volatile
fbusato Apr 1, 2026
de42a45
unused-local-typedef
fbusato Apr 1, 2026
4e7873d
fix clang pragma
fbusato Apr 1, 2026
6e5021f
simplify conditions
fbusato Apr 2, 2026
8ff54f4
improve documentation
fbusato Apr 2, 2026
c4c1504
fix operator==
fbusato Apr 2, 2026
e603a96
Update docs/libcudacxx/extended_api/type_traits/is_trivially_copyable…
fbusato Apr 6, 2026
cde3d1e
Update docs/libcudacxx/extended_api/type_traits/is_trivially_copyable…
fbusato Apr 6, 2026
c20fb89
add recursive struct check
fbusato Apr 7, 2026
35f9d15
add comment
fbusato Apr 7, 2026
4db130a
Merge branch 'relaxed-type-traits' of github.com:fbusato/cccl into re…
fbusato Apr 7, 2026
1f6254c
fix nvrtc
fbusato Apr 7, 2026
ab184ff
rename to cuda::is_trivially_copyable
fbusato Apr 7, 2026
6c5f19e
update documentation
fbusato Apr 8, 2026
79f4310
test nvfp only in CUDA >= 12.3
fbusato Apr 8, 2026
4506e40
update bit_cast implementation
fbusato Apr 8, 2026
80b09fa
add documentation
fbusato Apr 8, 2026
cd776c9
fix compile warnings/errors
fbusato Apr 8, 2026
d342544
Update libcudacxx/include/cuda/std/__bit/bit_cast.h
fbusato Apr 9, 2026
d491743
fix license, inline
fbusato Apr 9, 2026
4da9f32
Merge branch 'relaxed-type-traits' of github.com:fbusato/cccl into re…
fbusato Apr 9, 2026
c23d451
add complex support and improve bit_cast
fbusato Apr 9, 2026
1065c84
move __builtin_structured_binding_size to builtin.h
fbusato Apr 9, 2026
4450c1d
__aggregate_all_of
fbusato Apr 9, 2026
fc773e0
Merge remote-tracking branch 'origin/aggregate_members_all_of' into r…
fbusato Apr 9, 2026
5f1fb0c
revert builtin.h
fbusato Apr 9, 2026
ce37dab
add missing is_trivially_default_constructible
fbusato Apr 9, 2026
352b6b3
skip MSVC unsupported cases
fbusato Apr 9, 2026
ceb8393
fix headers
fbusato Apr 9, 2026
4b8e70d
Merge remote-tracking branch 'origin/aggregate_members_all_of' into r…
fbusato Apr 10, 2026
6813780
fix complex and removed user specialization
fbusato Apr 10, 2026
6025190
add half/bfloat guard
fbusato Apr 10, 2026
e432b8d
protect __half/bfloat16
fbusato Apr 10, 2026
6f49e7b
Merge branch 'main' into relaxed-type-traits
fbusato Apr 29, 2026
55b641f
align with is_bitwise_comparable
fbusato Apr 30, 2026
d328eb1
Merge branch 'main' into relaxed-type-traits
fbusato Apr 30, 2026
26f9e6c
update implementation and unit tests
fbusato Apr 30, 2026
3c62e3f
apply cuda::is_trivially_copyable to all files
fbusato Apr 30, 2026
4bc8737
split bit_cast unit test
fbusato Apr 30, 2026
38d3419
formatting
fbusato Apr 30, 2026
9e76b3f
update documentation
fbusato Apr 30, 2026
c5d1c4a
fix CI failures
fbusato May 1, 2026
a44c391
add TEST_FUNC
fbusato May 1, 2026
7addbc1
update doc
fbusato May 1, 2026
7fefe97
pre-commit
fbusato May 1, 2026
b40820c
exclude default_initializable for GCC7
fbusato May 1, 2026
eed543a
Merge branch 'main' into relaxed-type-traits
fbusato May 1, 2026
55402fe
fix doc
fbusato May 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions c/parallel/src/transform.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cub/util_temporary_storage.cuh>
#include <cub/util_type.cuh>

#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/cstdint>
#include <cuda/std/memory>

Expand Down Expand Up @@ -327,7 +328,7 @@ static_assert(device_transform_policy()(detail::current_tuning_cc()) == {9}, "Ho
build_ptr->cache = new transform::cache();

// avoid new and delete which requires the allocated and freed types to match
static_assert(std::is_trivially_copyable_v<decltype(policy_sel)>);
static_assert(::cuda::is_trivially_copyable_v<decltype(policy_sel)>);
build_ptr->runtime_policy = std::malloc(sizeof(policy_sel));
std::memcpy(build_ptr->runtime_policy, &policy_sel, sizeof(policy_sel));

Expand Down Expand Up @@ -528,7 +529,7 @@ static_assert(device_transform_policy()(detail::current_tuning_cc()) == {12}, "H
build_ptr->cache = new transform::cache();

// avoid new and delete which requires the allocated and freed types to match
static_assert(std::is_trivially_copyable_v<decltype(policy_sel)>);
static_assert(::cuda::is_trivially_copyable_v<decltype(policy_sel)>);
build_ptr->runtime_policy = std::malloc(sizeof(policy_sel));
std::memcpy(build_ptr->runtime_policy, &policy_sel, sizeof(policy_sel));

Expand Down
10 changes: 5 additions & 5 deletions cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
#include <cub/util_temporary_storage.cuh>
#include <cub/warp/warp_reduce.cuh>

#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__type_traits/conditional.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>

#include <nv/target>

Expand Down Expand Up @@ -473,7 +473,7 @@ using default_no_delay_t = default_no_delay_constructor_t::delay_t;
template <class T>
using default_delay_constructor_t =
// TODO(bgruber): remove the check for is_primitive<ValueT> in CCCL 4.0
::cuda::std::conditional_t<is_primitive<T>::value || ::cuda::std::is_trivially_copyable_v<T>,
::cuda::std::conditional_t<is_primitive<T>::value || ::cuda::is_trivially_copyable_v<T>,
fixed_delay_constructor_t<350, 450>,
default_no_delay_constructor_t>;

Expand All @@ -483,7 +483,7 @@ using default_delay_t = typename default_delay_constructor_t<T>::delay_t;
template <class KeyT, class ValueT>
using default_reduce_by_key_delay_constructor_t =
// TODO(bgruber): remove the check for is_primitive<ValueT> in CCCL 4.0
::cuda::std::conditional_t<(is_primitive<ValueT>::value || ::cuda::std::is_trivially_copyable_v<ValueT>)
::cuda::std::conditional_t<(is_primitive<ValueT>::value || ::cuda::is_trivially_copyable_v<ValueT>)
&& (sizeof(ValueT) + sizeof(KeyT) < largest_atomic_message_size),
reduce_by_key_delay_constructor_t<350, 450>,
default_delay_constructor_t<KeyValuePair<KeyT, ValueT>>>;
Expand Down Expand Up @@ -582,7 +582,7 @@ _CCCL_HOST_DEVICE _CCCL_FORCEINLINE cudaError_t tile_state_init(
template <typename T,
// TODO(bgruber): remove the check for is_primitive<T> in CCCL 4.0
bool SingleWord = detail::is_primitive<T>::value
|| (::cuda::std::is_trivially_copyable_v<T>
|| (::cuda::is_trivially_copyable_v<T>
&& sizeof(T) < detail::largest_atomic_message_size
// TODO(bgruber): a power of two size is not strictly necessary, but the implementation
// cannot handle it currently. For example, we could support status word + int3.
Expand Down Expand Up @@ -957,7 +957,7 @@ struct ScanTileState<T, false>
template <typename ValueT,
typename KeyT,
// TODO(bgruber): remove the check for is_primitive<ValueT> in CCCL 4.0
bool SingleWord = (detail::is_primitive<ValueT>::value || ::cuda::std::is_trivially_copyable_v<ValueT>)
bool SingleWord = (detail::is_primitive<ValueT>::value || ::cuda::is_trivially_copyable_v<ValueT>)
&& (sizeof(ValueT) + sizeof(KeyT) < detail::largest_atomic_message_size)>
struct ReduceByKeyScanTileState;

Expand Down
6 changes: 3 additions & 3 deletions cub/cub/detail/uninitialized_copy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
# pragma system_header
#endif // no system header

#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__new/device_new.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__utility/forward.h>

CUB_NAMESPACE_BEGIN
Expand All @@ -30,14 +30,14 @@ _CCCL_HOST_DEVICE void uninitialized_copy_single(T* ptr, U&& val)
new (ptr) T(::cuda::std::forward<U>(val));
}
#else // ^^^ _CCCL_CUDA_COMPILER(NVHPC) ^^^ / vvv !_CCCL_CUDA_COMPILER(NVHPC) vvv
template <typename T, typename U, ::cuda::std::enable_if_t<::cuda::std::is_trivially_copyable_v<T>, int> = 0>
template <typename T, typename U, ::cuda::std::enable_if_t<::cuda::is_trivially_copyable_v<T>, int> = 0>
_CCCL_HOST_DEVICE void uninitialized_copy_single(T* ptr, U&& val)
{
// gevtushenko: placement new should work here as well, but the code generated for copy assignment is sometimes better
*ptr = ::cuda::std::forward<U>(val);
}

template <typename T, typename U, ::cuda::std::enable_if_t<!::cuda::std::is_trivially_copyable_v<T>, int> = 0>
template <typename T, typename U, ::cuda::std::enable_if_t<!::cuda::is_trivially_copyable_v<T>, int> = 0>
_CCCL_HOST_DEVICE void uninitialized_copy_single(T* ptr, U&& val)
{
new (ptr) T(::cuda::std::forward<U>(val));
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/detail/warpspeed/look_ahead.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cuda/__functional/operator_properties.h>
#include <cuda/__memory/is_aligned.h>
#include <cuda/__ptx/instructions/get_sreg.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__bit/popcount.h>
#include <cuda/std/__type_traits/underlying_type.h>

Expand Down Expand Up @@ -75,7 +76,7 @@ storeTileAggregate(tile_state_t<AccumT>* ptrTileStates, scan_state scanState, Ac
_CCCL_ASSERT(index >= 0 && index < gridDim.x, "Reading out of bounds tile state");

if constexpr (sizeof(tile_state_t<AccumT>) <= cub::detail::warpspeed::max_native_atomic_size()
&& ::cuda::std::is_trivially_copyable_v<tile_state_t<AccumT>>)
&& ::cuda::is_trivially_copyable_v<tile_state_t<AccumT>>)
{
static_assert(::cuda::is_power_of_two(sizeof(tile_state_t<AccumT>)));
tile_state_t<AccumT> tmp{scanState, sum};
Expand Down Expand Up @@ -103,7 +104,7 @@ _CCCL_DEVICE_API tile_state_t<AccumT> loadTileAggregate(tile_state_t<AccumT>* pt

tile_state_t<AccumT> res;
if constexpr (sizeof(tile_state_t<AccumT>) <= cub::detail::warpspeed::max_native_atomic_size()
&& ::cuda::std::is_trivially_copyable_v<tile_state_t<AccumT>>)
&& ::cuda::is_trivially_copyable_v<tile_state_t<AccumT>>)
{
static_assert(::cuda::is_power_of_two(sizeof(tile_state_t<AccumT>)));
# if _CCCL_HAS_NV_ATOMIC_BUILTINS()
Expand Down
3 changes: 2 additions & 1 deletion cub/cub/device/dispatch/kernels/kernel_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cub/grid/grid_queue.cuh>
#include <cub/util_arch.cuh>

#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__numeric/reduce.h>

CUB_NAMESPACE_BEGIN
Expand Down Expand Up @@ -80,7 +81,7 @@ struct Transforms
static_assert(::cuda::std::is_convertible_v<CommonT, int>,
"The common type of `LevelT` and `SampleT` must be "
"convertible to `int`.");
static_assert(::cuda::std::is_trivially_copyable_v<CommonT>,
static_assert(::cuda::is_trivially_copyable_v<CommonT>,
"The common type of `LevelT` and `SampleT` must be "
"trivially copyable.");

Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@

#include <cuda/__cmath/ceil_div.h>
#include <cuda/__device/compute_capability.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__algorithm/clamp.h>
#include <cuda/std/__host_stdlib/ostream>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/std/concepts>

CUB_NAMESPACE_BEGIN
Expand Down Expand Up @@ -1657,7 +1657,7 @@ struct policy_selector_from_types
int{sizeof(AccumT)},
classify_type<AccumT>,
is_primitive_v<KeyT>,
::cuda::std::is_trivially_copyable_v<KeyT>,
::cuda::is_trivially_copyable_v<KeyT>,
is_primitive_v<AccumT>,
basic_binary_op_t<ReductionOpT>::value}(cc);
}
Expand Down
3 changes: 2 additions & 1 deletion cub/cub/device/dispatch/tuning/tuning_rle_encode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <cuda/__cmath/ceil_div.h>
#include <cuda/__device/compute_capability.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__algorithm/clamp.h>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__host_stdlib/ostream>
Expand Down Expand Up @@ -525,7 +526,7 @@ struct policy_selector_from_types
int{sizeof(KeyT)},
classify_type<KeyT>,
is_primitive_v<LengthT>,
::cuda::std::is_trivially_copyable_v<LengthT>,
::cuda::is_trivially_copyable_v<LengthT>,
is_primitive_v<KeyT>};
return selector(cc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cub/util_device.cuh>

#include <cuda/__device/compute_capability.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__algorithm/clamp.h>
#include <cuda/std/__host_stdlib/ostream>
#include <cuda/std/concepts>
Expand Down Expand Up @@ -612,7 +613,7 @@ struct policy_selector_from_types
classify_type<KeyT>,
is_primitive_v<LengthT>,
is_primitive_v<KeyT>,
::cuda::std::is_trivially_copyable_v<KeyT>};
::cuda::is_trivially_copyable_v<KeyT>};
return selector(cc);
}
};
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <thrust/type_traits/is_contiguous_iterator.h>

#include <cuda/__device/compute_capability.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__functional/invoke.h>
#include <cuda/std/__functional/operations.h>
Expand Down Expand Up @@ -1451,8 +1452,8 @@ struct policy_selector_from_types
classify_op<ScanOpT>,
THRUST_NS_QUALIFIER::is_contiguous_iterator_v<InputIteratorT>,
THRUST_NS_QUALIFIER::is_contiguous_iterator_v<OutputIteratorT>,
::cuda::std::is_trivially_copyable_v<InputValueT>,
::cuda::std::is_trivially_copyable_v<OutputValueT>,
::cuda::is_trivially_copyable_v<InputValueT>,
::cuda::is_trivially_copyable_v<OutputValueT>,
::cuda::std::is_default_constructible_v<OutputValueT>,
accum_is_primitive_or_trivially_copy_constructible,
benchmark_match};
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
#include <cub/util_type.cuh>

#include <cuda/__device/compute_capability.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__host_stdlib/ostream>
#include <cuda/std/__type_traits/is_trivially_copyable.h>

CUB_NAMESPACE_BEGIN

Expand Down Expand Up @@ -1951,7 +1951,7 @@ struct policy_selector_from_types
static_cast<int>(sizeof(ValueT)),
static_cast<int>(sizeof(AccumT)),
is_primitive<ValueT>::value,
::cuda::std::is_trivially_copyable_v<ValueT>,
::cuda::is_trivially_copyable_v<ValueT>,
classify_type<KeyT>,
classify_type<ValueT>,
classify_type<AccumT>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# include <cuda/__runtime/api_wrapper.h>
# include <cuda/__stream/invalid_stream.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/__type_traits/is_trivially_copyable.h>
# include <cuda/__utility/no_init.h>
# include <cuda/std/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__utility/exchange.h>
# include <cuda/std/__utility/move.h>
# include <cuda/std/cstddef>
Expand Down Expand Up @@ -68,7 +68,7 @@ namespace cuda::experimental
template <class _Tp>
class graph_buffer
{
static_assert(::cuda::std::is_trivially_copyable_v<_Tp>, "graph_buffer requires T to be trivially copyable.");
static_assert(::cuda::is_trivially_copyable_v<_Tp>, "graph_buffer requires T to be trivially copyable.");

public:
using value_type = _Tp;
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__copy/mdspan_d2d.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# include <cuda/__mdspan/host_device_mdspan.h>
# include <cuda/__mdspan/traits.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__algorithm/max.h>
# include <cuda/std/__functional/identity.h>
# include <cuda/std/__host_stdlib/stdexcept>
Expand All @@ -43,7 +44,6 @@
# include <cuda/std/__type_traits/is_const.h>
# include <cuda/std/__type_traits/is_convertible.h>
# include <cuda/std/__type_traits/is_same.h>
# include <cuda/std/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__type_traits/remove_cvref.h>

# include <cuda/experimental/__copy/copy_contiguous.cuh>
Expand Down Expand Up @@ -125,7 +125,7 @@ _CCCL_HOST_API void copy(::cuda::device_mdspan<_TpIn, _ExtentsIn, _LayoutPolicyI
&& ::cuda::std::is_convertible_v<_AccessorPolicyOut, __default_accessor_out>;
constexpr bool __are_byte_copyable =
::cuda::std::is_same_v<::cuda::std::remove_cv_t<_TpIn>, ::cuda::std::remove_cv_t<_TpOut>>
&& ::cuda::std::is_trivially_copyable_v<_TpIn> //
&& ::cuda::is_trivially_copyable_v<_TpIn> //
&& __have_default_accessors;

if (__tensor_size == 1 && __are_byte_copyable)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
# include <cuda/__mdspan/host_device_mdspan.h>
# include <cuda/__mdspan/traits.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__algorithm/max.h>
# include <cuda/std/__cstddef/types.h>
# include <cuda/std/__host_stdlib/stdexcept>
Expand All @@ -37,7 +38,6 @@
# include <cuda/std/__type_traits/is_const.h>
# include <cuda/std/__type_traits/is_convertible.h>
# include <cuda/std/__type_traits/is_same.h>
# include <cuda/std/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__type_traits/remove_cv.h>

# include <cuda/experimental/__copy_bytes/memcpy_batch_tiles.cuh>
Expand Down Expand Up @@ -74,7 +74,7 @@ _CCCL_HOST_API void __copy_bytes_impl(
namespace cudax = ::cuda::experimental;
static_assert(::cuda::std::is_same_v<::cuda::std::remove_cv_t<_TpIn>, ::cuda::std::remove_cv_t<_TpOut>>,
"cudax::copy_bytes: TpIn and TpOut must be the same type");
static_assert(::cuda::std::is_trivially_copyable_v<_TpIn>, "TpIn must be trivially copyable");
static_assert(::cuda::is_trivially_copyable_v<_TpIn>, "TpIn must be trivially copyable");
static_assert(!::cuda::std::is_const_v<_TpOut>, "TpOut must not be const");
static_assert(::cuda::__is_cuda_mdspan_layout_v<_LayoutPolicyIn>,
"cudax::copy_bytes: LayoutPolicyIn must be a predefined layout policy");
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__graph/copy_bytes.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
# include <cuda/__algorithm/common.h>
# include <cuda/__stream/launch_transform.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__concepts/concept_macros.h>
# include <cuda/std/__exception/exception_macros.h>
# include <cuda/std/__host_stdlib/stdexcept>
# include <cuda/std/__type_traits/is_const.h>
# include <cuda/std/__type_traits/is_trivially_copyable.h>
# include <cuda/std/cstddef>
# include <cuda/std/span>

Expand All @@ -47,7 +47,7 @@ _CCCL_HOST_API graph_node_ref
__copy_bytes_graph_impl(path_builder& __pb, ::cuda::std::span<_SrcTy> __src, ::cuda::std::span<_DstTy> __dst)
{
static_assert(!::cuda::std::is_const_v<_DstTy>, "Copy destination can't be const");
static_assert(::cuda::std::is_trivially_copyable_v<_SrcTy> && ::cuda::std::is_trivially_copyable_v<_DstTy>,
static_assert(::cuda::is_trivially_copyable_v<_SrcTy> && ::cuda::is_trivially_copyable_v<_DstTy>,
"Copy source and destination element types must be trivially copyable");

if (__src.size_bytes() > __dst.size_bytes())
Expand Down
5 changes: 2 additions & 3 deletions cudax/include/cuda/experimental/__graph/fill_bytes.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
# include <cuda/__algorithm/common.h>
# include <cuda/__stream/launch_transform.h>
# include <cuda/__stream/stream_ref.h>
# include <cuda/__type_traits/is_trivially_copyable.h>
# include <cuda/std/__concepts/concept_macros.h>
# include <cuda/std/__exception/exception_macros.h>
# include <cuda/std/__host_stdlib/stdexcept>
# include <cuda/std/__type_traits/is_const.h>
# include <cuda/std/__type_traits/is_trivially_copyable.h>
# include <cuda/std/cstdint>
# include <cuda/std/span>

Expand All @@ -48,8 +48,7 @@ _CCCL_HOST_API graph_node_ref
__fill_bytes_graph_impl(path_builder& __pb, ::cuda::std::span<_DstTy, _DstSize> __dst, ::cuda::std::uint8_t __value)
{
static_assert(!::cuda::std::is_const_v<_DstTy>, "Fill destination can't be const");
static_assert(::cuda::std::is_trivially_copyable_v<_DstTy>,
"Fill destination element type must be trivially copyable");
static_assert(::cuda::is_trivially_copyable_v<_DstTy>, "Fill destination element type must be trivially copyable");

auto __deps = __pb.get_dependencies();
::CUgraphNodeParams __params{};
Expand Down
4 changes: 2 additions & 2 deletions cudax/include/cuda/experimental/__kernel/kernel_ref.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#include <cuda/__driver/driver_api.h>
#include <cuda/__memory/address_space.h>
#include <cuda/__runtime/api_wrapper.h>
#include <cuda/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__type_traits/always_false.h>
#include <cuda/std/__type_traits/is_trivially_copyable.h>
#include <cuda/std/__utility/forward.h>
#include <cuda/std/string_view>

Expand All @@ -51,7 +51,7 @@ class kernel_ref
template <class... _Args>
class kernel_ref<void(_Args...)>
{
static_assert((true && ... && ::cuda::std::is_trivially_copyable_v<_Args>),
static_assert((true && ... && ::cuda::is_trivially_copyable_v<_Args>),
"All kernel_ref argument types must be trivially copyable.");

public:
Expand Down
6 changes: 6 additions & 0 deletions docs/libcudacxx/extended_api/type_traits.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Type traits
:maxdepth: 1

type_traits/is_floating_point
type_traits/is_trivially_copyable
type_traits/is_bitwise_comparable
type_traits/vector_types

Expand All @@ -30,6 +31,11 @@ Type traits
- CCCL 3.3.0
- CUDA 13.3

* - :ref:`cuda::is_trivially_copyable <libcudacxx-extended-api-type_traits-is_trivially_copyable>`
- Relaxed trivially copyable check including extended floating-point types
- CCCL 3.4.0
- CUDA 13.4

* - :ref:`cuda::is_bitwise_comparable <libcudacxx-extended-api-type_traits-is_bitwise_comparable>`
- User-specializable bitwise comparability check
- CCCL 3.4.0
Expand Down
Loading
Loading