From 8e2ad303c92a33e6b6d8198fa54c19f7ea215515 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 15:36:44 -0500 Subject: [PATCH 1/8] Add from_chars for integers CUDA support --- include/boost/charconv/detail/config.hpp | 5 ++ .../detail/from_chars_integer_impl.hpp | 77 ++++++++++++++++++- .../charconv/detail/from_chars_result.hpp | 7 +- include/boost/charconv/from_chars.hpp | 24 +++--- 4 files changed, 95 insertions(+), 18 deletions(-) diff --git a/include/boost/charconv/detail/config.hpp b/include/boost/charconv/detail/config.hpp index 9feb609f..0badc50b 100644 --- a/include/boost/charconv/detail/config.hpp +++ b/include/boost/charconv/detail/config.hpp @@ -201,5 +201,10 @@ static_assert(std::is_same::value, "__float128 should b #endif +#ifdef __NVCC__ +# define BOOST_CHARCONV_HOST_DEVICE __host__ __device__ +#else +# define BOOST_CHARCONV_HOST_DEVICE +#endif #endif // BOOST_CHARCONV_DETAIL_CONFIG_HPP diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp index 94d53af6..fd94f286 100644 --- a/include/boost/charconv/detail/from_chars_integer_impl.hpp +++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp @@ -22,6 +22,8 @@ namespace boost { namespace charconv { namespace detail { +#ifndef __NVCC__ + static constexpr unsigned char uchar_values[] = {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, @@ -83,9 +85,33 @@ static constexpr double log_2_table[] = 0.193426403617 }; +#endif // __NVCC__ + // Convert characters for 0-9, A-Z, a-z to 0-35. Anything else is 255 -constexpr unsigned char digit_from_char(char val) noexcept +BOOST_CHARCONV_HOST_DEVICE constexpr unsigned char digit_from_char(const char val) noexcept { + #ifdef __NVCC__ + + constexpr unsigned char uchar_values[] = + {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; + + #endif // __NVCC__ + return uchar_values[static_cast(val)]; } @@ -112,8 +138,53 @@ constexpr unsigned char digit_from_char(char val) noexcept #endif template -BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept { + #ifdef __NVCC__ + + constexpr double log_2_table[] = + { + 0.0, + 0.0, + 1.0, + 0.630929753571, + 0.5, + 0.430676558073, + 0.386852807235, + 0.356207187108, + 0.333333333333, + 0.315464876786, + 0.301029995664, + 0.289064826318, + 0.278942945651, + 0.270238154427, + 0.262649535037, + 0.255958024810, + 0.25, + 0.244650542118, + 0.239812466568, + 0.235408913367, + 0.231378213160, + 0.227670248697, + 0.224243824218, + 0.221064729458, + 0.218104291986, + 0.215338279037, + 0.212746053553, + 0.210309917857, + 0.208014597677, + 0.205846832460, + 0.203795047091, + 0.201849086582, + 0.2, + 0.198239863171, + 0.196561632233, + 0.194959021894, + 0.193426403617 + }; + + #endif // __NVCC__ + Unsigned_Integer result = 0; Unsigned_Integer overflow_value = 0; Unsigned_Integer max_digit = 0; @@ -310,7 +381,7 @@ BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* firs // Only from_chars for integer types is constexpr (as of C++23) template -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integer& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integer& value, int base = 10) noexcept { using Unsigned_Integer = typename std::make_unsigned::type; return detail::from_chars_integer_impl(first, last, value, base); diff --git a/include/boost/charconv/detail/from_chars_result.hpp b/include/boost/charconv/detail/from_chars_result.hpp index e4302cfa..38404dc3 100644 --- a/include/boost/charconv/detail/from_chars_result.hpp +++ b/include/boost/charconv/detail/from_chars_result.hpp @@ -5,6 +5,7 @@ #ifndef BOOST_CHARCONV_DETAIL_FROM_CHARS_RESULT_HPP #define BOOST_CHARCONV_DETAIL_FROM_CHARS_RESULT_HPP +#include #include namespace boost { namespace charconv { @@ -22,17 +23,17 @@ struct from_chars_result_t // ERANGE = result_out_of_range std::errc ec; - friend constexpr bool operator==(const from_chars_result_t& lhs, const from_chars_result_t& rhs) noexcept + BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator==(const from_chars_result_t& lhs, const from_chars_result_t& rhs) noexcept { return lhs.ptr == rhs.ptr && lhs.ec == rhs.ec; } - friend constexpr bool operator!=(const from_chars_result_t& lhs, const from_chars_result_t& rhs) noexcept + BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator!=(const from_chars_result_t& lhs, const from_chars_result_t& rhs) noexcept { return !(lhs == rhs); // NOLINT : Expression can not be simplified since this is the definition } - constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } + BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } }; using from_chars_result = from_chars_result_t; diff --git a/include/boost/charconv/from_chars.hpp b/include/boost/charconv/from_chars.hpp index 0bef0b5f..84a62fd5 100644 --- a/include/boost/charconv/from_chars.hpp +++ b/include/boost/charconv/from_chars.hpp @@ -19,48 +19,48 @@ namespace boost { namespace charconv { // integer overloads -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, bool& value, int base = 10) noexcept = delete; -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, char& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, bool& value, int base = 10) noexcept = delete; +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, char& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, signed char& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, signed char& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned char& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned char& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, short& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, short& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned short& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned short& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned int& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned int& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long long& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long long& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } -BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long long& value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long long& value, int base = 10) noexcept { return detail::from_chars(first, last, value, base); } From e649c46e3ae2dd3556bca78ea51b8744462c08a5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 16:06:55 -0500 Subject: [PATCH 2/8] Add CUDA cmake CI runner --- .github/workflows/ci.yml | 59 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1490bac7..98db6edc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -765,3 +765,62 @@ jobs: - name: Run tests run: ci/build.sh + + + cuda-cmake-test: + strategy: + fail-fast: false + + runs-on: gpu-runner-1 + + steps: + - uses: Jimver/cuda-toolkit@v0.2.25 + id: cuda-toolkit + with: + cuda: '12.8.0' + method: 'network' + sub-packages: '["nvcc"]' + + - name: Output CUDA information + run: | + echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+ + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + nvcc -V + - uses: actions/checkout@v4 + + - name: Install Packages + run: | + sudo apt-get install -y cmake make + - name: Setup Boost + run: | + echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY + LIBRARY=${GITHUB_REPOSITORY#*/} + echo LIBRARY: $LIBRARY + echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV + echo GITHUB_BASE_REF: $GITHUB_BASE_REF + echo GITHUB_REF: $GITHUB_REF + REF=${GITHUB_BASE_REF:-$GITHUB_REF} + REF=${REF#refs/heads/} + echo REF: $REF + BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true + echo BOOST_BRANCH: $BOOST_BRANCH + cd .. + git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root + cd boost-root + mkdir -p libs/$LIBRARY + cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY + git submodule update --init tools/boostdep + python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY + - name: Configure + run: | + cd ../boost-root + mkdir __build__ && cd __build__ + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_CHARCONV_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=17 .. + - name: Build tests + run: | + cd ../boost-root/__build__ + cmake --build . --target tests -j $(nproc) + - name: Run tests + run: | + cd ../boost-root/__build__ + ctest --output-on-failure --no-tests=error From 833c656c8dabe119dff81d7c756374cec4c3d003 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 16:07:12 -0500 Subject: [PATCH 3/8] Add CUDA testing infrastructure --- test/cuda_managed_ptr.hpp | 139 ++++++++++++++++++++++++++++++++++++++ test/stopwatch.hpp | 39 +++++++++++ 2 files changed, 178 insertions(+) create mode 100644 test/cuda_managed_ptr.hpp create mode 100644 test/stopwatch.hpp diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp new file mode 100644 index 00000000..b0eb866f --- /dev/null +++ b/test/cuda_managed_ptr.hpp @@ -0,0 +1,139 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP +#define BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +class managed_holder_base +{ +protected: + static int count; + managed_holder_base() { ++count; } + ~managed_holder_base() + { + if(0 == --count) + cudaDeviceSynchronize(); + } +}; + +int managed_holder_base::count = 0; + +// +// Reset the device and exit: +// cudaDeviceReset causes the driver to clean up all state. While +// not mandatory in normal operation, it is good practice. It is also +// needed to ensure correct operation when the application is being +// profiled. Calling cudaDeviceReset causes all profile data to be +// flushed before the application exits. +// +// We have a global instance of this class, plus instances for each +// managed pointer. Last one out the door switches the lights off. +// +class cudaResetter +{ + static int count; +public: + cudaResetter() { ++count; } + ~cudaResetter() + { + if(--count == 0) + { + cudaError_t err = cudaDeviceReset(); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +}; + +int cudaResetter::count = 0; + +cudaResetter global_resetter; + +template +class cuda_managed_ptr +{ + T* data; + static const cudaResetter resetter; + cuda_managed_ptr(const cuda_managed_ptr&) = delete; + cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete; + void free() + { + if(data) + { + cudaDeviceSynchronize(); + cudaError_t err = cudaFree(data); + if(err != cudaSuccess) + { + std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl; + } + } + } +public: + cuda_managed_ptr() : data(0) {} + cuda_managed_ptr(std::size_t n) + { + cudaError_t err = cudaSuccess; + void *ptr; + err = cudaMallocManaged(&ptr, n * sizeof(T)); + if(err != cudaSuccess) + throw std::runtime_error(cudaGetErrorString(err)); + cudaDeviceSynchronize(); + data = static_cast(ptr); + } + cuda_managed_ptr(cuda_managed_ptr&& o) + { + data = o.data; + o.data = 0; + } + cuda_managed_ptr& operator=(cuda_managed_ptr&& o) + { + free(); + data = o.data; + o.data = 0; + return *this; + } + ~cuda_managed_ptr() + { + free(); + } + + class managed_holder : managed_holder_base + { + T* pdata; + public: + managed_holder(T* p) : managed_holder_base(), pdata(p) {} + managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator T* () { return pdata; } + T& operator[] (std::size_t n) { return pdata[n]; } + }; + class const_managed_holder : managed_holder_base + { + const T* pdata; + public: + const_managed_holder(T* p) : managed_holder_base(), pdata(p) {} + const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {} + operator const T* () { return pdata; } + const T& operator[] (std::size_t n) { return pdata[n]; } + }; + + managed_holder get() { return managed_holder(data); } + const_managed_holder get()const { return data; } + T& operator[](std::size_t n) { return data[n]; } + const T& operator[](std::size_t n)const { return data[n]; } +}; + +template +cudaResetter const cuda_managed_ptr::resetter; + +#endif // BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP \ No newline at end of file diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp new file mode 100644 index 00000000..ab9a91a7 --- /dev/null +++ b/test/stopwatch.hpp @@ -0,0 +1,39 @@ +// Copyright John Maddock 2016. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_CHARCONV_CUDA_STOPWATCH_HPP +#define BOOST_CHARCONV_CUDA_STOPWATCH_HPP + +#ifdef _MSC_VER +#pragma once +#endif + +#include + +template +struct stopwatch +{ + typedef typename Clock::duration duration; + stopwatch() + { + m_start = Clock::now(); + } + double elapsed() + { + duration t = Clock::now() - m_start; + return std::chrono::duration_cast>(t).count(); + } + void reset() + { + m_start = Clock::now(); + } + +private: + typename Clock::time_point m_start; +}; + +typedef stopwatch watch; + +#endif \ No newline at end of file From 29487c28e4a643f58d0faded002df2b04449a241 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 16:07:26 -0500 Subject: [PATCH 4/8] Add CUDA tests and CML options --- test/CMakeLists.txt | 22 +++- test/cuda_jamfile | 22 ++++ test/test_from_chars_char.cu | 114 +++++++++++++++++++++ test/test_from_chars_int.cu | 114 +++++++++++++++++++++ test/test_from_chars_long.cu | 114 +++++++++++++++++++++ test/test_from_chars_long_long.cu | 114 +++++++++++++++++++++ test/test_from_chars_short.cu | 114 +++++++++++++++++++++ test/test_from_chars_signed_char.cu | 114 +++++++++++++++++++++ test/test_from_chars_unsigned_char.cu | 114 +++++++++++++++++++++ test/test_from_chars_unsigned_int.cu | 114 +++++++++++++++++++++ test/test_from_chars_unsigned_long.cu | 114 +++++++++++++++++++++ test/test_from_chars_unsigned_long_long.cu | 114 +++++++++++++++++++++ test/test_from_chars_unsigned_short.cu | 114 +++++++++++++++++++++ 13 files changed, 1295 insertions(+), 3 deletions(-) create mode 100644 test/cuda_jamfile create mode 100644 test/test_from_chars_char.cu create mode 100644 test/test_from_chars_int.cu create mode 100644 test/test_from_chars_long.cu create mode 100644 test/test_from_chars_long_long.cu create mode 100644 test/test_from_chars_short.cu create mode 100644 test/test_from_chars_signed_char.cu create mode 100644 test/test_from_chars_unsigned_char.cu create mode 100644 test/test_from_chars_unsigned_int.cu create mode 100644 test/test_from_chars_unsigned_long.cu create mode 100644 test/test_from_chars_unsigned_long_long.cu create mode 100644 test/test_from_chars_unsigned_short.cu diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7bf0e6e0..1b6afc06 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,8 +6,24 @@ include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST) if(HAVE_BOOST_TEST) -# https://crascit.com/2015/03/28/enabling-cxx11-in-cmake/ -set(CMAKE_CXX_EXTENSIONS OFF) -boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert) + enable_testing() + + if(BOOST_CHARCONV_ENABLE_CUDA) + + message(STATUS "Building Boost.charconv with CUDA") + + find_package(CUDA REQUIRED) + enable_language(CUDA) + set(CMAKE_CUDA_EXTENSIONS OFF) + + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + + else() + + # https://crascit.com/2015/03/28/enabling-cxx11-in-cmake/ + set(CMAKE_CXX_EXTENSIONS OFF) + boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert) + + endif() endif() diff --git a/test/cuda_jamfile b/test/cuda_jamfile new file mode 100644 index 00000000..d91cf413 --- /dev/null +++ b/test/cuda_jamfile @@ -0,0 +1,22 @@ +# Copyright 2024 Matt Borland +# Distributed under the Boost Software License, Version 1.0. +# https://www.boost.org/LICENSE_1_0.txt + +import testing ; +import ../../config/checks/config : requires ; + +project : requirements + [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] + ; + +run test_from_chars_char.cu ; +run test_from_chars_signed_char.cu ; +run test_from_chars_unsigned_char.cu ; +run test_from_chars_short.cu ; +run test_from_chars_unsigned_short.cu ; +run test_from_chars_int.cu ; +run test_from_chars_unsigned_int.cu ; +run test_from_chars_long.cu ; +run test_from_chars_unsigned_long.cu ; +run test_from_chars_long_long.cu ; +run test_from_chars_unsigned_long_long.cu ; diff --git a/test/test_from_chars_char.cu b/test/test_from_chars_char.cu new file mode 100644 index 00000000..9e2c6cfb --- /dev/null +++ b/test/test_from_chars_char.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_int.cu b/test/test_from_chars_int.cu new file mode 100644 index 00000000..a17668b4 --- /dev/null +++ b/test/test_from_chars_int.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = int; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_long.cu b/test/test_from_chars_long.cu new file mode 100644 index 00000000..e1ad27eb --- /dev/null +++ b/test/test_from_chars_long.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_long_long.cu b/test/test_from_chars_long_long.cu new file mode 100644 index 00000000..ee9ec056 --- /dev/null +++ b/test/test_from_chars_long_long.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_short.cu b/test/test_from_chars_short.cu new file mode 100644 index 00000000..8cdec393 --- /dev/null +++ b/test/test_from_chars_short.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = short; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_signed_char.cu b/test/test_from_chars_signed_char.cu new file mode 100644 index 00000000..df064971 --- /dev/null +++ b/test/test_from_chars_signed_char.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = signed char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_unsigned_char.cu b/test/test_from_chars_unsigned_char.cu new file mode 100644 index 00000000..3ec966a0 --- /dev/null +++ b/test/test_from_chars_unsigned_char.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_unsigned_int.cu b/test/test_from_chars_unsigned_int.cu new file mode 100644 index 00000000..40ce8f81 --- /dev/null +++ b/test/test_from_chars_unsigned_int.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned int; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_unsigned_long.cu b/test/test_from_chars_unsigned_long.cu new file mode 100644 index 00000000..d2f773e6 --- /dev/null +++ b/test/test_from_chars_unsigned_long.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_unsigned_long_long.cu b/test/test_from_chars_unsigned_long_long.cu new file mode 100644 index 00000000..009d4c7a --- /dev/null +++ b/test/test_from_chars_unsigned_long_long.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_unsigned_short.cu b/test/test_from_chars_unsigned_short.cu new file mode 100644 index 00000000..ae280a5a --- /dev/null +++ b/test/test_from_chars_unsigned_short.cu @@ -0,0 +1,114 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned short; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From ac110be7295d7c60f63b6d3d6e64964b037d7fd9 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 13 Mar 2026 16:50:38 -0500 Subject: [PATCH 5/8] Add testing with bases from 2 to 36 --- test/cuda_jamfile | 12 ++ test/test_from_chars_bases_char.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_int.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_long.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_long_long.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_short.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_signed_char.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_unsigned_char.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_unsigned_int.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_unsigned_long.cu | 121 ++++++++++++++++++ ...est_from_chars_bases_unsigned_long_long.cu | 121 ++++++++++++++++++ test/test_from_chars_bases_unsigned_short.cu | 121 ++++++++++++++++++ 12 files changed, 1343 insertions(+) create mode 100644 test/test_from_chars_bases_char.cu create mode 100644 test/test_from_chars_bases_int.cu create mode 100644 test/test_from_chars_bases_long.cu create mode 100644 test/test_from_chars_bases_long_long.cu create mode 100644 test/test_from_chars_bases_short.cu create mode 100644 test/test_from_chars_bases_signed_char.cu create mode 100644 test/test_from_chars_bases_unsigned_char.cu create mode 100644 test/test_from_chars_bases_unsigned_int.cu create mode 100644 test/test_from_chars_bases_unsigned_long.cu create mode 100644 test/test_from_chars_bases_unsigned_long_long.cu create mode 100644 test/test_from_chars_bases_unsigned_short.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index d91cf413..be30752e 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -20,3 +20,15 @@ run test_from_chars_long.cu ; run test_from_chars_unsigned_long.cu ; run test_from_chars_long_long.cu ; run test_from_chars_unsigned_long_long.cu ; + +run test_from_chars_bases_char.cu ; +run test_from_chars_bases_signed_char.cu ; +run test_from_chars_bases_unsigned_char.cu ; +run test_from_chars_bases_short.cu ; +run test_from_chars_bases_unsigned_short.cu ; +run test_from_chars_bases_int.cu ; +run test_from_chars_bases_unsigned_int.cu ; +run test_from_chars_bases_long.cu ; +run test_from_chars_bases_unsigned_long.cu ; +run test_from_chars_bases_long_long.cu ; +run test_from_chars_bases_unsigned_long_long.cu ; diff --git a/test/test_from_chars_bases_char.cu b/test/test_from_chars_bases_char.cu new file mode 100644 index 00000000..3a38a8d8 --- /dev/null +++ b/test/test_from_chars_bases_char.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_int.cu b/test/test_from_chars_bases_int.cu new file mode 100644 index 00000000..25e7d9c2 --- /dev/null +++ b/test/test_from_chars_bases_int.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = int; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_long.cu b/test/test_from_chars_bases_long.cu new file mode 100644 index 00000000..5b50f1aa --- /dev/null +++ b/test/test_from_chars_bases_long.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_long_long.cu b/test/test_from_chars_bases_long_long.cu new file mode 100644 index 00000000..6926dc57 --- /dev/null +++ b/test/test_from_chars_bases_long_long.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_short.cu b/test/test_from_chars_bases_short.cu new file mode 100644 index 00000000..f9ee2852 --- /dev/null +++ b/test/test_from_chars_bases_short.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = short; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_signed_char.cu b/test/test_from_chars_bases_signed_char.cu new file mode 100644 index 00000000..9df80852 --- /dev/null +++ b/test/test_from_chars_bases_signed_char.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = signed char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_unsigned_char.cu b/test/test_from_chars_bases_unsigned_char.cu new file mode 100644 index 00000000..15dda9d2 --- /dev/null +++ b/test/test_from_chars_bases_unsigned_char.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = static_cast(dist(rng)); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_unsigned_int.cu b/test/test_from_chars_bases_unsigned_int.cu new file mode 100644 index 00000000..951137b4 --- /dev/null +++ b/test/test_from_chars_bases_unsigned_int.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned int; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_unsigned_long.cu b/test/test_from_chars_bases_unsigned_long.cu new file mode 100644 index 00000000..8e5cc069 --- /dev/null +++ b/test/test_from_chars_bases_unsigned_long.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_unsigned_long_long.cu b/test/test_from_chars_bases_unsigned_long_long.cu new file mode 100644 index 00000000..3c848d72 --- /dev/null +++ b/test/test_from_chars_bases_unsigned_long_long.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_from_chars_bases_unsigned_short.cu b/test/test_from_chars_bases_unsigned_short.cu new file mode 100644 index 00000000..b7e6649e --- /dev/null +++ b/test/test_from_chars_bases_unsigned_short.cu @@ -0,0 +1,121 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned short; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + const char* str = in_strings + i * BUF_SIZE; + test_type val {}; + boost::charconv::from_chars(str, str + in_lengths[i], val, base); + out[i] = val; + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_strings(numElements * BUF_SIZE); + cuda_managed_ptr input_lengths(numElements); + + // Allocate the managed output vector + cuda_managed_ptr output_vector(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + std::vector expected(numElements); + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + expected[i] = dist(rng); + char* buf = &input_strings[i * BUF_SIZE]; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base); + input_lengths[i] = static_cast(res.ptr - buf); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + test_type val {}; + const char* str = &input_strings[i * BUF_SIZE]; + boost::charconv::from_chars(str, str + input_lengths[i], val, base); + results.push_back(val); + } + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 9f4faa32c7a9d5d3db7e943fc61177c1be8f00c5 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 16 Mar 2026 09:48:56 -0500 Subject: [PATCH 6/8] Disallow builtin 128-bit integers on device --- include/boost/charconv/detail/config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/charconv/detail/config.hpp b/include/boost/charconv/detail/config.hpp index 0badc50b..59b71ba6 100644 --- a/include/boost/charconv/detail/config.hpp +++ b/include/boost/charconv/detail/config.hpp @@ -20,7 +20,7 @@ #endif // Use 128-bit integers and suppress warnings for using extensions -#if defined(BOOST_HAS_INT128) +#if defined(BOOST_HAS_INT128) && !defined(__NVCC__) # define BOOST_CHARCONV_HAS_INT128 # define BOOST_CHARCONV_INT128_MAX static_cast((static_cast(1) << 127) - 1) # define BOOST_CHARCONV_INT128_MIN (-BOOST_CHARCONV_INT128_MAX - 1) From eb191f123291a56cd6c45ddb868dfe49ece722e4 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 16 Mar 2026 10:42:38 -0500 Subject: [PATCH 7/8] Disable emulated128 path on device --- include/boost/charconv/detail/from_chars_integer_impl.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp index fd94f286..56f11b75 100644 --- a/include/boost/charconv/detail/from_chars_integer_impl.hpp +++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp @@ -396,10 +396,12 @@ BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars128(const char* first, } #endif +#ifndef __NVCC__ BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars128(const char* first, const char* last, uint128& value, int base = 10) noexcept { return from_chars_integer_impl(first, last, value, base); } +#endif }}} // Namespaces From 977fb1fbafe39964585a7da03e6ee8790f14d013 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Mon, 16 Mar 2026 11:23:39 -0500 Subject: [PATCH 8/8] Replace uses of numeric limits on device --- .../detail/from_chars_integer_impl.hpp | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp index 56f11b75..4c95cf3f 100644 --- a/include/boost/charconv/detail/from_chars_integer_impl.hpp +++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp @@ -137,6 +137,27 @@ BOOST_CHARCONV_HOST_DEVICE constexpr unsigned char digit_from_char(const char va #endif +#ifdef __NVCC__ + +template +__host__ __device__ constexpr T get_max_value() +{ + using UT = typename std::make_unsigned::type; + return std::is_signed::value + ? static_cast(static_cast(-1) >> 1) + : static_cast(static_cast(-1)); +} + +#else + +template +constexpr T get_max_value() +{ + return (std::numeric_limits::max)(); +} + +#endif + template BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept { @@ -226,8 +247,8 @@ BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_in else #endif { - overflow_value = static_cast((std::numeric_limits::max)()); - max_digit = static_cast((std::numeric_limits::max)()); + overflow_value = static_cast(get_max_value()); + max_digit = static_cast(get_max_value()); } if (is_negative) @@ -252,8 +273,8 @@ BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_in else #endif { - overflow_value = (std::numeric_limits::max)(); - max_digit = (std::numeric_limits::max)(); + overflow_value = get_max_value(); + max_digit = get_max_value(); } }