diff --git a/README.md b/README.md index d92026a4c..ac65884eb 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ with older GCC versions. * x86_64 is the main development platform and thoroughly tested. This includes support from SSE-only up to AVX512 on Xeon Phi or Xeon CPUs. * aarch64, arm, and ppc64le was tested and verified to work. No significant - performance evaluation was done. + performance evaluation was done. Sve2 instructions are in development. * In any case, a fallback to correct execution via builtin arthmetic types is available for all targets. diff --git a/experimental/bits/simd.h b/experimental/bits/simd.h index 4c893114f..383387ec0 100644 --- a/experimental/bits/simd.h +++ b/experimental/bits/simd.h @@ -48,6 +48,10 @@ #include #endif +#ifdef __ARM_FEATURE_SVE +#include +#endif /* __ARM_FEATURE_SVE */ + /* There are several closely related types, with the following naming * convention: * _Tp: vectorizable (arithmetic) type (or any type) @@ -110,17 +114,20 @@ template template using _VecN = _VecBuiltin; -template +template using _Sse = _VecBuiltin<_UsedBytes>; -template - using _Avx = _VecBuiltin<_UsedBytes>; +template +using _Sve2 = _VecBuiltin<_UsedBytes>; + +template +using _Avx = _VecBuiltin<_UsedBytes>; -template - using _Avx512 = _VecBltnBtmsk<_UsedBytes>; +template +using _Avx512 = _VecBltnBtmsk<_UsedBytes>; -template - using _Neon = _VecBuiltin<_UsedBytes>; +template +using _Neon = _VecBuiltin<_UsedBytes>; // implementation-defined: using __sse = _Sse<>; @@ -129,6 +136,9 @@ using __avx512 = _Avx512<>; using __neon = _Neon<>; using __neon128 = _Neon<16>; using __neon64 = _Neon<8>; +using __sve2 = _Sve2<>; +using __sve2_8 = _Sve2<8>; +using __sve2_16 = _Sve2<16>; // standard: template @@ -492,6 +502,8 @@ constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl; constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON; constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32; constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64; +constexpr inline bool __have_sve2_a32 = _GLIBCXX_SIMD_HAVE_SVE2_A32; +constexpr inline bool __have_sve2_a64 = _GLIBCXX_SIMD_HAVE_SVE2_A64; constexpr inline bool __support_neon_float = #if defined __GCC_IEC_559 __GCC_IEC_559 == 0; @@ -2262,6 +2274,16 @@ template <> { using type = float64x2_t; }; #endif +#if _GLIBCXX_SIMD_HAVE_SVE2_A64 +template <> + struct __intrinsic_type + { using type = float64x1_t; }; + +template <> + struct __intrinsic_type + { using type = float64x2_t; }; +#endif + #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \ template <> \ struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \ diff --git a/experimental/bits/simd_detail.h b/experimental/bits/simd_detail.h index 1fde7a404..7037c7105 100644 --- a/experimental/bits/simd_detail.h +++ b/experimental/bits/simd_detail.h @@ -64,6 +64,17 @@ #else #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 #endif +#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) +#define _GLIBCXX_SIMD_HAVE_SVE2_A32 1 +#else +#define _GLIBCXX_SIMD_HAVE_SVE2_A32 0 +#endif +#if defined __ARM_SVE2 && defined __aarch64__ +#define _GLIBCXX_SIMD_HAVE_SVE2_A64 1 +#else +#define _GLIBCXX_SIMD_HAVE_SVE2_A64 0 +#endif + //}}} // x86{{{ #ifdef __MMX__