diff --git a/test/cuda_jamfile b/test/cuda_jamfile index cd2fcc13..bac4ab2b 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -61,3 +61,6 @@ run test_signed_ge.cu ; run test_unsigned_cstdlib_div.cu ; run test_signed_cstdlib_div.cu ; + +run test_signed_to_unsigned_conversion.cu ; +run test_unsigned_to_signed_conversion.cu ; diff --git a/test/test_signed_to_unsigned_conversion.cu b/test/test_signed_to_unsigned_conversion.cu new file mode 100644 index 00000000..5073f0f6 --- /dev/null +++ b/test/test_signed_to_unsigned_conversion.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using signed_type = boost::int128::int128_t; +using unsigned_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + * Converts signed int128 values to unsigned int128 values + */ +__global__ void cuda_test(const signed_type *in, unsigned_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = unsigned_type(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors with non-negative values so bit patterns match + boost::random::uniform_int_distribution dist {signed_type{0}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(unsigned_type(input_vector[i])); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_to_signed_conversion.cu b/test/test_unsigned_to_signed_conversion.cu new file mode 100644 index 00000000..016e7a5d --- /dev/null +++ b/test/test_unsigned_to_signed_conversion.cu @@ -0,0 +1,110 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using signed_type = boost::int128::int128_t; +using unsigned_type = boost::int128::uint128_t; + +/** + * CUDA Kernel Device code + * + * Converts unsigned int128 values to signed int128 values + */ +__global__ void cuda_test(const unsigned_type *in, signed_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = signed_type(in[i]); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors with values that fit in signed range + boost::random::uniform_int_distribution dist {unsigned_type{0U}, static_cast((std::numeric_limits::max)())}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + results.push_back(signed_type(input_vector[i])); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}