diff --git a/include/boost/int128/charconv.hpp b/include/boost/int128/charconv.hpp index 9171cae7..f8176f15 100644 --- a/include/boost/int128/charconv.hpp +++ b/include/boost/int128/charconv.hpp @@ -38,6 +38,24 @@ struct make_signed { using type = int128::int128_t; }; template <> struct make_signed { using type = int128::int128_t; }; +#ifdef __NVCC__ + +template <> +__host__ __device__ constexpr int128::uint128_t get_max_value() +{ + return std::numeric_limits::max(); +} + +template <> +__host__ __device__ constexpr int128::int128_t get_max_value() +{ + return std::numeric_limits::max(); +} + +#endif // __NVCC__ + +#ifndef __NVCC__ + BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = { int128::uint128_t{UINT64_C(0x0), UINT64_C(0x1)}, @@ -81,8 +99,57 @@ BOOST_INT128_INLINE_CONSTEXPR int128::uint128_t int128_pow10[39] = int128::uint128_t{UINT64_C(0x4b3b4ca85a86c47a), UINT64_C(0x98a224000000000)} }; -constexpr int num_digits(const int128::uint128_t& x) noexcept +#endif // __NVCC__ + +BOOST_INT128_HOST_DEVICE constexpr int num_digits(const int128::uint128_t& x) noexcept { + #ifdef __NVCC__ + + constexpr int128::uint128_t int128_pow10[39] = + { + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x1)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xa)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x64)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x3e8)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2710)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x186a0)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xf4240)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x989680)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x5f5e100)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x3b9aca00)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2540be400)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x174876e800)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xe8d4a51000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x9184e72a000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x5af3107a4000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x38d7ea4c68000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x2386f26fc10000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x16345785d8a0000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0xde0b6b3a7640000)}, + int128::uint128_t{UINT64_C(0x0), UINT64_C(0x8ac7230489e80000)}, + int128::uint128_t{UINT64_C(0x5), UINT64_C(0x6bc75e2d63100000)}, + int128::uint128_t{UINT64_C(0x36), UINT64_C(0x35c9adc5dea00000)}, + int128::uint128_t{UINT64_C(0x21e), UINT64_C(0x19e0c9bab2400000)}, + int128::uint128_t{UINT64_C(0x152d), UINT64_C(0x2c7e14af6800000)}, + int128::uint128_t{UINT64_C(0xd3c2), UINT64_C(0x1bcecceda1000000)}, + int128::uint128_t{UINT64_C(0x84595), UINT64_C(0x161401484a000000)}, + int128::uint128_t{UINT64_C(0x52b7d2), UINT64_C(0xdcc80cd2e4000000)}, + int128::uint128_t{UINT64_C(0x33b2e3c), UINT64_C(0x9fd0803ce8000000)}, + int128::uint128_t{UINT64_C(0x204fce5e), UINT64_C(0x3e25026110000000)}, + int128::uint128_t{UINT64_C(0x1431e0fae), UINT64_C(0x6d7217caa0000000)}, + int128::uint128_t{UINT64_C(0xc9f2c9cd0), UINT64_C(0x4674edea40000000)}, + int128::uint128_t{UINT64_C(0x7e37be2022), UINT64_C(0xc0914b2680000000)}, + int128::uint128_t{UINT64_C(0x4ee2d6d415b), UINT64_C(0x85acef8100000000)}, + int128::uint128_t{UINT64_C(0x314dc6448d93), UINT64_C(0x38c15b0a00000000)}, + int128::uint128_t{UINT64_C(0x1ed09bead87c0), UINT64_C(0x378d8e6400000000)}, + int128::uint128_t{UINT64_C(0x13426172c74d82), UINT64_C(0x2b878fe800000000)}, + int128::uint128_t{UINT64_C(0xc097ce7bc90715), UINT64_C(0xb34b9f1000000000)}, + int128::uint128_t{UINT64_C(0x785ee10d5da46d9), UINT64_C(0xf436a000000000)}, + int128::uint128_t{UINT64_C(0x4b3b4ca85a86c47a), UINT64_C(0x98a224000000000)} + }; + + #endif // __NVCC__ + if (x.high == UINT64_C(0)) { return num_digits(x.low); @@ -91,7 +158,7 @@ constexpr int num_digits(const int128::uint128_t& x) noexcept // Use the most significant bit position to approximate log10 // log10(x) ~= log2(x) / log2(10) ~= log2(x) / 3.32 - const auto msb {64 + (63 - int128::detail::impl::countl_impl(x.high))}; + const auto msb {64 + (63 - int128::detail::countl_zero(x.high))}; // Approximate log10 const auto estimated_digits {(msb * 1000) / 3322 + 1}; @@ -112,27 +179,35 @@ constexpr int num_digits(const int128::uint128_t& x) noexcept } // namespace detail -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::uint128_t value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::uint128_t value, const int base = 10) noexcept { + #ifndef __NVCC__ + if (base == 10) { return detail::to_chars_128integer_impl(first, last, value); } + #endif // __NVCC__ + return detail::to_chars_integer_impl(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::int128_t value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, const int128::int128_t value, const int base = 10) noexcept { + #ifndef __NVCC__ + if (base == 10) { return detail::to_chars_128integer_impl(first, last, value); } + #endif // __NVCC__ + return detail::to_chars_integer_impl(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::uint128_t& value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::uint128_t& value, const int base = 10) noexcept { return detail::from_chars_integer_impl(first, last, value, base); } @@ -142,7 +217,7 @@ BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(core::string_view sv, return detail::from_chars_integer_impl(sv.data(), sv.data() + sv.size(), value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::int128_t& value, const int base = 10) noexcept +BOOST_INT128_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int128::int128_t& value, const int base = 10) noexcept { return detail::from_chars_integer_impl(first, last, value, base); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f2fbf41e..809715e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,9 +19,8 @@ if(HAVE_BOOST_TEST) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) - - + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::int128 Boost::random Boost::charconv ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + else() boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::int128 Boost::core Boost::random Boost::multiprecision Boost::mp11 Boost::charconv) diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 82e2c8ee..dab3ff9d 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -79,3 +79,13 @@ run test_unsigned_lcm.cu ; run test_signed_lcm.cu ; run test_unsigned_midpoint.cu ; run test_signed_midpoint.cu ; + +run test_unsigned_to_chars.cu ; +run test_signed_to_chars.cu ; +run test_unsigned_from_chars.cu ; +run test_signed_from_chars.cu ; + +run test_unsigned_to_chars_bases.cu ; +run test_signed_to_chars_bases.cu ; +run test_unsigned_from_chars_bases.cu ; +run test_signed_from_chars_bases.cu ; diff --git a/test/test_signed_from_chars.cu b/test/test_signed_from_chars.cu new file mode 100644 index 00000000..66d67eb0 --- /dev/null +++ b/test/test_signed_from_chars.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors by generating random values and converting to strings + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type cpu_val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], cpu_val); + + if (output_vector[i] != cpu_val) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_from_chars_bases.cu b/test/test_signed_from_chars_bases.cu new file mode 100644 index 00000000..69b175fb --- /dev/null +++ b/test/test_signed_from_chars_bases.cu @@ -0,0 +1,125 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_to_chars.cu b/test/test_signed_to_chars.cu new file mode 100644 index 00000000..20a6a944 --- /dev/null +++ b/test/test_signed_to_chars.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_to_chars_bases.cu b/test/test_signed_to_chars_bases.cu new file mode 100644 index 00000000..15733649 --- /dev/null +++ b/test/test_signed_to_chars_bases.cu @@ -0,0 +1,117 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::int128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_from_chars.cu b/test/test_unsigned_from_chars.cu new file mode 100644 index 00000000..727dcfa9 --- /dev/null +++ b/test/test_unsigned_from_chars.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors by generating random values and converting to strings + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type cpu_val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], cpu_val); + + if (output_vector[i] != cpu_val) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_from_chars_bases.cu b/test/test_unsigned_from_chars_bases.cu new file mode 100644 index 00000000..514e4cdc --- /dev/null +++ b/test/test_unsigned_from_chars_bases.cu @@ -0,0 +1,125 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vectors + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_chars.cu b/test/test_unsigned_to_chars.cu new file mode 100644 index 00000000..25d4252a --- /dev/null +++ b/test/test_unsigned_to_chars.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 64; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_chars_bases.cu b/test/test_unsigned_to_chars_bases.cu new file mode 100644 index 00000000..2a4545a2 --- /dev/null +++ b/test/test_unsigned_to_chars_bases.cu @@ -0,0 +1,117 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = boost::int128::uint128_t; + +constexpr int BUF_SIZE = 192; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +}