Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__fwd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ using mask = basic_mask<sizeof(_Tp), __deduce_abi_t<_Tp, _Np>>;
template <typename _Tp, typename _Abi>
struct __simd_storage;

template <typename _Tp, typename _Abi>
template <typename _Tp, typename _Abi, typename = void>
struct __simd_operations;

template <size_t _Bytes, typename _Abi>
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/std/__simd/basic_vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <cuda/std/__simd/concepts.h>
#include <cuda/std/__simd/flag.h>
#include <cuda/std/__simd/iterator.h>
#include <cuda/std/__simd/specializations/fixed_size_float_vec.h>
#include <cuda/std/__simd/specializations/fixed_size_vec.h>
#include <cuda/std/__simd/utility.h>
#include <cuda/std/__type_traits/enable_if.h>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++ in the CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_FLOAT_VEC_H
#define _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_FLOAT_VEC_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__simd/specializations/fixed_size_vec.h>
#include <cuda/std/__simd/specializations/fp32x2_intrinsics.h>
#include <cuda/std/__type_traits/enable_if.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD

// Simd operations for fixed_size ABI with float elements and F32x2 fast paths.
template <__simd_size_type _Np>
struct __simd_operations<float, __fixed_size<_Np>, enable_if_t<__is_fixed_size_float_v<float, _Np>>>
: __fixed_size_operations<float, _Np>
{
using __base = __fixed_size_operations<float, _Np>;
using _SimdStorage = __simd_storage<float, __fixed_size<_Np>>;

_CCCL_API static constexpr void __increment(_SimdStorage& __s) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
// clang-format off
NV_IF_TARGET(NV_IS_EXACTLY_SM_100,
(constexpr _SimdStorage __one = __base::__broadcast(1.0f);
__s = ::cuda::std::simd::__plus_f32x2(__s, __one);
return;))
// clang-format on
}
#endif // _CCCL_HAS_SIMD_F32X2()
__base::__increment(__s);
}

_CCCL_API static constexpr void __decrement(_SimdStorage& __s) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
// clang-format off
NV_IF_TARGET(NV_IS_EXACTLY_SM_100,
(constexpr _SimdStorage __one = __base::__broadcast(1.0f);
__s = ::cuda::std::simd::__minus_f32x2(__s, __one);
return;))
// clang-format on
}
#endif // _CCCL_HAS_SIMD_F32X2()
__base::__decrement(__s);
}

[[nodiscard]] _CCCL_API static constexpr _SimdStorage __unary_minus(const _SimdStorage& __s) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
// clang-format off
NV_IF_TARGET(NV_IS_EXACTLY_SM_100,
(constexpr _SimdStorage __zero = __base::__broadcast(0.0f);
return ::cuda::std::simd::__minus_f32x2(__zero, __s);))
// clang-format on
}
#endif // _CCCL_HAS_SIMD_F32X2()
return __base::__unary_minus(__s);
}

[[nodiscard]] _CCCL_API static constexpr _SimdStorage
__plus(const _SimdStorage& __lhs, const _SimdStorage& __rhs) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_TARGET(NV_IS_EXACTLY_SM_100, (return ::cuda::std::simd::__plus_f32x2(__lhs, __rhs);))
}
#endif // _CCCL_HAS_SIMD_F32X2()
return __base::__plus(__lhs, __rhs);
}

[[nodiscard]] _CCCL_API static constexpr _SimdStorage
__minus(const _SimdStorage& __lhs, const _SimdStorage& __rhs) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_TARGET(NV_IS_EXACTLY_SM_100, (return ::cuda::std::simd::__minus_f32x2(__lhs, __rhs);))
}
#endif // _CCCL_HAS_SIMD_F32X2()
return __base::__minus(__lhs, __rhs);
}

[[nodiscard]] _CCCL_API static constexpr _SimdStorage
__multiplies(const _SimdStorage& __lhs, const _SimdStorage& __rhs) noexcept
{
#if _CCCL_HAS_SIMD_F32X2()
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
{
NV_IF_TARGET(NV_IS_EXACTLY_SM_100, (return ::cuda::std::simd::__multiplies_f32x2(__lhs, __rhs);))
}
#endif // _CCCL_HAS_SIMD_F32X2()
return __base::__multiplies(__lhs, __rhs);
}
};

_CCCL_END_NAMESPACE_CUDA_STD_SIMD

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_FLOAT_VEC_H
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cuda/__utility/in_range.h>
#include <cuda/std/__cstddef/types.h>
#include <cuda/std/__fwd/simd.h>
#include <cuda/std/__simd/specializations/fixed_size_storage.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__utility/integer_sequence.h>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++ in the CUDA C++ Core Libraries,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_STORAGE_H
#define _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_STORAGE_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/__utility/in_range.h>
#include <cuda/std/__fwd/simd.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD

template <__simd_size_type _Np>
struct __fixed_size
{
static_assert(_Np > 0, "_Np must be greater than 0");

static constexpr __simd_size_type __simd_size = _Np;
};

// Element-per-slot simd storage for fixed_size ABI
template <typename _Tp, __simd_size_type _Np>
struct __simd_storage<_Tp, __fixed_size<_Np>>
{
using value_type = _Tp;

_Tp __data[_Np]{};

[[nodiscard]] _CCCL_API constexpr _Tp __get(const __simd_size_type __idx) const noexcept
{
_CCCL_ASSERT(::cuda::in_range(__idx, __simd_size_type{0}, _Np), "Index is out of bounds");
return __data[__idx];
}

_CCCL_API constexpr void __set(const __simd_size_type __idx, const _Tp __v) noexcept
{
_CCCL_ASSERT(::cuda::in_range(__idx, __simd_size_type{0}, _Np), "Index is out of bounds");
__data[__idx] = __v;
}
};

_CCCL_END_NAMESPACE_CUDA_STD_SIMD

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___SIMD_SPECIALIZATIONS_FIXED_SIZE_STORAGE_H
Original file line number Diff line number Diff line change
Expand Up @@ -21,51 +21,24 @@
# pragma system_header
#endif // no system header

#include <cuda/__utility/in_range.h>
#include <cuda/std/__fwd/simd.h>
#include <cuda/std/__simd/specializations/fixed_size_mask.h>
#include <cuda/std/__simd/specializations/fixed_size_storage.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/__utility/integer_sequence.h>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_SIMD

template <__simd_size_type _Np>
struct __fixed_size
{
static_assert(_Np > 0, "_Np must be greater than 0");

static constexpr __simd_size_type __simd_size = _Np;
};

// Element-per-slot simd storage for fixed_size ABI
// Simd operations for fixed_size ABI
template <typename _Tp, __simd_size_type _Np>
struct __simd_storage<_Tp, __fixed_size<_Np>>
{
using value_type = _Tp;

_Tp __data[_Np]{};

_CCCL_HIDE_FROM_ABI constexpr __simd_storage() = default;
_CCCL_HIDE_FROM_ABI constexpr __simd_storage(const __simd_storage&) = default;
_CCCL_HIDE_FROM_ABI constexpr __simd_storage& operator=(const __simd_storage&) = default;
inline constexpr bool __is_fixed_size_float_v = is_same_v<_Tp, float> && _Np >= 2;

[[nodiscard]] _CCCL_API constexpr _Tp __get(const __simd_size_type __idx) const noexcept
{
_CCCL_ASSERT(::cuda::in_range(__idx, __simd_size_type{0}, _Np), "Index is out of bounds");
return __data[__idx];
}

_CCCL_API constexpr void __set(const __simd_size_type __idx, const _Tp __v) noexcept
{
_CCCL_ASSERT(::cuda::in_range(__idx, __simd_size_type{0}, _Np), "Index is out of bounds");
__data[__idx] = __v;
}
};

// Simd operations for fixed_size ABI
template <typename _Tp, __simd_size_type _Np>
struct __simd_operations<_Tp, __fixed_size<_Np>>
struct __fixed_size_operations
{
using _SimdStorage = __simd_storage<_Tp, __fixed_size<_Np>>;
using _MaskStorage = __mask_storage<sizeof(_Tp), __fixed_size<_Np>>;
Expand All @@ -90,7 +63,7 @@ struct __simd_operations<_Tp, __fixed_size<_Np>>
((__result.__data[_Is] = __g(integral_constant<__simd_size_type, _Is>())), ...);
return __result;
#else // ^^^ C++20 ^^^ / vvv C++17 vvv
return _SimdStorage{{ __g(integral_constant<__simd_size_type, _Is>())... }};
return _SimdStorage{{__g(integral_constant<__simd_size_type, _Is>())...}};
#endif // _CCCL_STD_VER < 2020
}

Expand Down Expand Up @@ -354,6 +327,13 @@ struct __simd_operations<_Tp, __fixed_size<_Np>>
return __result;
}
};

// Default path (no optimizations)
template <typename _Tp, __simd_size_type _Np>
struct __simd_operations<_Tp, __fixed_size<_Np>, enable_if_t<!__is_fixed_size_float_v<_Tp, _Np>>>
: __fixed_size_operations<_Tp, _Np>
{};

_CCCL_END_NAMESPACE_CUDA_STD_SIMD

#include <cuda/std/__cccl/epilogue.h>
Expand Down
Loading
Loading