From 8e2ad303c92a33e6b6d8198fa54c19f7ea215515 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 13 Mar 2026 15:36:44 -0500
Subject: [PATCH 1/8] Add from_chars for integers CUDA support

---
 include/boost/charconv/detail/config.hpp      |  5 ++
 .../detail/from_chars_integer_impl.hpp        | 77 ++++++++++++++++++-
 .../charconv/detail/from_chars_result.hpp     |  7 +-
 include/boost/charconv/from_chars.hpp         | 24 +++---
 4 files changed, 95 insertions(+), 18 deletions(-)
diff --git a/include/boost/charconv/detail/config.hpp b/include/boost/charconv/detail/config.hpp
index 9feb609f..0badc50b 100644
--- a/include/boost/charconv/detail/config.hpp
+++ b/include/boost/charconv/detail/config.hpp
@@ -201,5 +201,10 @@ static_assert(std::is_same<long double, __float128>::value, "__float128 should b
 
 #endif
 
+#ifdef __NVCC__
+#  define BOOST_CHARCONV_HOST_DEVICE __host__ __device__
+#else
+#  define BOOST_CHARCONV_HOST_DEVICE
+#endif
 
 #endif // BOOST_CHARCONV_DETAIL_CONFIG_HPP
diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp
index 94d53af6..fd94f286 100644
--- a/include/boost/charconv/detail/from_chars_integer_impl.hpp
+++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp
@@ -22,6 +22,8 @@
 
 namespace boost { namespace charconv { namespace detail {
 
+#ifndef __NVCC__
+
 static constexpr unsigned char uchar_values[] =
      {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
       255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
@@ -83,9 +85,33 @@ static constexpr double log_2_table[] =
     0.193426403617
 };
 
+#endif // __NVCC__
+
 // Convert characters for 0-9, A-Z, a-z to 0-35. Anything else is 255
-constexpr unsigned char digit_from_char(char val) noexcept
+BOOST_CHARCONV_HOST_DEVICE constexpr unsigned char digit_from_char(const char val) noexcept
 {
+    #ifdef __NVCC__
+
+    constexpr unsigned char uchar_values[] =
+    {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+       0,   1,   2,   3,   4,   5,   6,   7,   8,   9, 255, 255, 255, 255, 255, 255,
+     255,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
+      25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35, 255, 255, 255, 255, 255,
+     255,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
+      25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+     255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255};
+
+    #endif // __NVCC__
+
     return uchar_values[static_cast<unsigned char>(val)];
 }
 
@@ -112,8 +138,53 @@ constexpr unsigned char digit_from_char(char val) noexcept
 #endif
 
 template <typename Integer, typename Unsigned_Integer>
-BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept
 {
+    #ifdef __NVCC__
+
+    constexpr double log_2_table[] =
+    {
+        0.0,
+        0.0,
+        1.0,
+        0.630929753571,
+        0.5,
+        0.430676558073,
+        0.386852807235,
+        0.356207187108,
+        0.333333333333,
+        0.315464876786,
+        0.301029995664,
+        0.289064826318,
+        0.278942945651,
+        0.270238154427,
+        0.262649535037,
+        0.255958024810,
+        0.25,
+        0.244650542118,
+        0.239812466568,
+        0.235408913367,
+        0.231378213160,
+        0.227670248697,
+        0.224243824218,
+        0.221064729458,
+        0.218104291986,
+        0.215338279037,
+        0.212746053553,
+        0.210309917857,
+        0.208014597677,
+        0.205846832460,
+        0.203795047091,
+        0.201849086582,
+        0.2,
+        0.198239863171,
+        0.196561632233,
+        0.194959021894,
+        0.193426403617
+    };
+
+    #endif // __NVCC__
+
     Unsigned_Integer result = 0;
     Unsigned_Integer overflow_value = 0;
     Unsigned_Integer max_digit = 0;
@@ -310,7 +381,7 @@ BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* firs
 
 // Only from_chars for integer types is constexpr (as of C++23)
 template <typename Integer>
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integer& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, Integer& value, int base = 10) noexcept
 {
     using Unsigned_Integer = typename std::make_unsigned<Integer>::type;
     return detail::from_chars_integer_impl<Integer, Unsigned_Integer>(first, last, value, base);
diff --git a/include/boost/charconv/detail/from_chars_result.hpp b/include/boost/charconv/detail/from_chars_result.hpp
index e4302cfa..38404dc3 100644
--- a/include/boost/charconv/detail/from_chars_result.hpp
+++ b/include/boost/charconv/detail/from_chars_result.hpp
@@ -5,6 +5,7 @@
 #ifndef BOOST_CHARCONV_DETAIL_FROM_CHARS_RESULT_HPP
 #define BOOST_CHARCONV_DETAIL_FROM_CHARS_RESULT_HPP
 
+#include <boost/charconv/detail/config.hpp>
 #include <system_error>
 
 namespace boost { namespace charconv {
@@ -22,17 +23,17 @@ struct from_chars_result_t
     // ERANGE = result_out_of_range
     std::errc ec;
 
-    friend constexpr bool operator==(const from_chars_result_t<UC>& lhs, const from_chars_result_t<UC>& rhs) noexcept
+    BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator==(const from_chars_result_t<UC>& lhs, const from_chars_result_t<UC>& rhs) noexcept
     {
         return lhs.ptr == rhs.ptr && lhs.ec == rhs.ec;
     }
 
-    friend constexpr bool operator!=(const from_chars_result_t<UC>& lhs, const from_chars_result_t<UC>& rhs) noexcept
+    BOOST_CHARCONV_HOST_DEVICE friend constexpr bool operator!=(const from_chars_result_t<UC>& lhs, const from_chars_result_t<UC>& rhs) noexcept
     {
         return !(lhs == rhs); // NOLINT : Expression can not be simplified since this is the definition
     }
 
-    constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
+    BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
 };
 using from_chars_result = from_chars_result_t<char>;
 
diff --git a/include/boost/charconv/from_chars.hpp b/include/boost/charconv/from_chars.hpp
index 0bef0b5f..84a62fd5 100644
--- a/include/boost/charconv/from_chars.hpp
+++ b/include/boost/charconv/from_chars.hpp
@@ -19,48 +19,48 @@ namespace boost { namespace charconv {
 
 // integer overloads
 
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, bool& value, int base = 10) noexcept = delete;
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, char& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, bool& value, int base = 10) noexcept = delete;
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, char& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, signed char& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, signed char& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned char& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned char& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, short& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, short& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned short& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned short& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, int& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned int& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned int& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long long& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, long long& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }
-BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long long& value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars(const char* first, const char* last, unsigned long long& value, int base = 10) noexcept
 {
     return detail::from_chars(first, last, value, base);
 }

From e649c46e3ae2dd3556bca78ea51b8744462c08a5 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 13 Mar 2026 16:06:55 -0500
Subject: [PATCH 2/8] Add CUDA cmake CI runner

---
 .github/workflows/ci.yml | 59 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1490bac7..98db6edc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -765,3 +765,62 @@ jobs:
 
       - name: Run tests
         run: ci/build.sh
+
+
+  cuda-cmake-test:
+    strategy:
+      fail-fast: false
+
+    runs-on: gpu-runner-1
+
+    steps:
+      - uses: Jimver/cuda-toolkit@v0.2.25
+        id: cuda-toolkit
+        with:
+          cuda: '12.8.0'
+          method: 'network'
+          sub-packages: '["nvcc"]'
+
+      - name: Output CUDA information
+        run: |
+          echo "Installed cuda version is: ${{steps.cuda-toolkit.outputs.cuda}}"+
+          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
+          nvcc -V
+      - uses: actions/checkout@v4
+
+      - name: Install Packages
+        run: |
+          sudo apt-get install -y cmake make
+      - name: Setup Boost
+        run: |
+          echo GITHUB_REPOSITORY: $GITHUB_REPOSITORY
+          LIBRARY=${GITHUB_REPOSITORY#*/}
+          echo LIBRARY: $LIBRARY
+          echo "LIBRARY=$LIBRARY" >> $GITHUB_ENV
+          echo GITHUB_BASE_REF: $GITHUB_BASE_REF
+          echo GITHUB_REF: $GITHUB_REF
+          REF=${GITHUB_BASE_REF:-$GITHUB_REF}
+          REF=${REF#refs/heads/}
+          echo REF: $REF
+          BOOST_BRANCH=develop && [ "$REF" == "master" ] && BOOST_BRANCH=master || true
+          echo BOOST_BRANCH: $BOOST_BRANCH
+          cd ..
+          git clone -b $BOOST_BRANCH --depth 1 https://github.com/boostorg/boost.git boost-root
+          cd boost-root
+          mkdir -p libs/$LIBRARY
+          cp -r $GITHUB_WORKSPACE/* libs/$LIBRARY
+          git submodule update --init tools/boostdep
+          python3 tools/boostdep/depinst/depinst.py --git_args "--jobs 3" $LIBRARY
+      - name: Configure
+        run: |
+          cd ../boost-root
+          mkdir __build__ && cd __build__
+          cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_CHARCONV_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 -DCMAKE_CUDA_STANDARD=17 ..
+      - name: Build tests
+        run: |
+          cd ../boost-root/__build__
+          cmake --build . --target tests -j $(nproc)
+      - name: Run tests
+        run: |
+          cd ../boost-root/__build__
+          ctest --output-on-failure --no-tests=error

From 833c656c8dabe119dff81d7c756374cec4c3d003 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 13 Mar 2026 16:07:12 -0500
Subject: [PATCH 3/8] Add CUDA testing infrastructure

---
 test/cuda_managed_ptr.hpp | 139 ++++++++++++++++++++++++++++++++++++++
 test/stopwatch.hpp        |  39 +++++++++++
 2 files changed, 178 insertions(+)
 create mode 100644 test/cuda_managed_ptr.hpp
 create mode 100644 test/stopwatch.hpp

diff --git a/test/cuda_managed_ptr.hpp b/test/cuda_managed_ptr.hpp
new file mode 100644
index 00000000..b0eb866f
--- /dev/null
+++ b/test/cuda_managed_ptr.hpp
@@ -0,0 +1,139 @@
+//  Copyright John Maddock 2016.
+//  Copyright Matt Borland 2024.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP
+#define BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <cuda_runtime.h>
+
+class managed_holder_base
+{
+protected:
+   static int count;
+   managed_holder_base() { ++count; }
+   ~managed_holder_base()
+   {
+      if(0 == --count)
+         cudaDeviceSynchronize();
+   }
+};
+
+int managed_holder_base::count = 0;
+
+//
+// Reset the device and exit:
+// cudaDeviceReset causes the driver to clean up all state. While
+// not mandatory in normal operation, it is good practice.  It is also
+// needed to ensure correct operation when the application is being
+// profiled. Calling cudaDeviceReset causes all profile data to be
+// flushed before the application exits.
+//
+// We have a global instance of this class, plus instances for each
+// managed pointer.  Last one out the door switches the lights off.
+//
+class cudaResetter
+{
+   static int count;
+public:
+   cudaResetter() { ++count;  }
+   ~cudaResetter()
+   {
+      if(--count == 0)
+      {
+         cudaError_t err = cudaDeviceReset();
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+};
+
+int cudaResetter::count = 0;
+
+cudaResetter global_resetter;
+
+template <class T>
+class cuda_managed_ptr
+{
+   T* data;
+   static const cudaResetter resetter;
+   cuda_managed_ptr(const cuda_managed_ptr&) = delete;
+   cuda_managed_ptr& operator=(cuda_managed_ptr const&) = delete;
+   void free()
+   {
+      if(data)
+      {
+         cudaDeviceSynchronize();
+         cudaError_t err = cudaFree(data);
+         if(err != cudaSuccess)
+         {
+            std::cerr << "Failed to deinitialize the device! error=" << cudaGetErrorString(err) << std::endl;
+         }
+      }
+   }
+public:
+   cuda_managed_ptr() : data(0) {}
+   cuda_managed_ptr(std::size_t n)
+   {
+      cudaError_t err = cudaSuccess;
+      void *ptr;
+      err = cudaMallocManaged(&ptr, n * sizeof(T));
+      if(err != cudaSuccess)
+         throw std::runtime_error(cudaGetErrorString(err));
+      cudaDeviceSynchronize();
+      data = static_cast<T*>(ptr);
+   }
+   cuda_managed_ptr(cuda_managed_ptr&& o)
+   {
+      data = o.data;
+      o.data = 0;
+   }
+   cuda_managed_ptr& operator=(cuda_managed_ptr&& o)
+   {
+      free();
+      data = o.data;
+      o.data = 0;
+      return *this;
+   }
+   ~cuda_managed_ptr()
+   {
+      free();
+   }
+
+   class managed_holder : managed_holder_base
+   {
+      T* pdata;
+   public:
+      managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator T* () { return pdata; }
+      T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+   class const_managed_holder : managed_holder_base
+   {
+      const T* pdata;
+   public:
+      const_managed_holder(T* p) : managed_holder_base(), pdata(p) {}
+      const_managed_holder(const managed_holder& o) : managed_holder_base(), pdata(o.pdata) {}
+      operator const T* () { return pdata; }
+      const T& operator[] (std::size_t n) { return pdata[n]; }
+   };
+
+   managed_holder get() { return managed_holder(data); }
+   const_managed_holder get()const { return data; }
+   T& operator[](std::size_t n) { return data[n]; }
+   const T& operator[](std::size_t n)const { return data[n]; }
+};
+
+template <class T>
+cudaResetter const cuda_managed_ptr<T>::resetter;
+
+#endif // BOOST_CHARCONV_CUDA_MANAGED_PTR_HPP
\ No newline at end of file
diff --git a/test/stopwatch.hpp b/test/stopwatch.hpp
new file mode 100644
index 00000000..ab9a91a7
--- /dev/null
+++ b/test/stopwatch.hpp
@@ -0,0 +1,39 @@
+//  Copyright John Maddock 2016.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_CHARCONV_CUDA_STOPWATCH_HPP
+#define BOOST_CHARCONV_CUDA_STOPWATCH_HPP
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+#include <chrono>
+
+template <class Clock>
+struct stopwatch
+{
+    typedef typename Clock::duration duration;
+    stopwatch()
+    {
+        m_start = Clock::now();
+    }
+    double elapsed()
+    {
+        duration t = Clock::now() - m_start;
+        return std::chrono::duration_cast<std::chrono::duration<double>>(t).count();
+    }
+    void reset()
+    {
+        m_start = Clock::now();
+    }
+
+private:
+    typename Clock::time_point m_start;
+};
+
+typedef stopwatch<std::chrono::high_resolution_clock> watch;
+
+#endif
\ No newline at end of file

From 29487c28e4a643f58d0faded002df2b04449a241 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 13 Mar 2026 16:07:26 -0500
Subject: [PATCH 4/8] Add CUDA tests and CML options

---
 test/CMakeLists.txt                        |  22 +++-
 test/cuda_jamfile                          |  22 ++++
 test/test_from_chars_char.cu               | 114 +++++++++++++++++++++
 test/test_from_chars_int.cu                | 114 +++++++++++++++++++++
 test/test_from_chars_long.cu               | 114 +++++++++++++++++++++
 test/test_from_chars_long_long.cu          | 114 +++++++++++++++++++++
 test/test_from_chars_short.cu              | 114 +++++++++++++++++++++
 test/test_from_chars_signed_char.cu        | 114 +++++++++++++++++++++
 test/test_from_chars_unsigned_char.cu      | 114 +++++++++++++++++++++
 test/test_from_chars_unsigned_int.cu       | 114 +++++++++++++++++++++
 test/test_from_chars_unsigned_long.cu      | 114 +++++++++++++++++++++
 test/test_from_chars_unsigned_long_long.cu | 114 +++++++++++++++++++++
 test/test_from_chars_unsigned_short.cu     | 114 +++++++++++++++++++++
 13 files changed, 1295 insertions(+), 3 deletions(-)
 create mode 100644 test/cuda_jamfile
 create mode 100644 test/test_from_chars_char.cu
 create mode 100644 test/test_from_chars_int.cu
 create mode 100644 test/test_from_chars_long.cu
 create mode 100644 test/test_from_chars_long_long.cu
 create mode 100644 test/test_from_chars_short.cu
 create mode 100644 test/test_from_chars_signed_char.cu
 create mode 100644 test/test_from_chars_unsigned_char.cu
 create mode 100644 test/test_from_chars_unsigned_int.cu
 create mode 100644 test/test_from_chars_unsigned_long.cu
 create mode 100644 test/test_from_chars_unsigned_long_long.cu
 create mode 100644 test/test_from_chars_unsigned_short.cu

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7bf0e6e0..1b6afc06 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -6,8 +6,24 @@ include(BoostTestJamfile OPTIONAL RESULT_VARIABLE HAVE_BOOST_TEST)
 
 if(HAVE_BOOST_TEST)
 
-# https://crascit.com/2015/03/28/enabling-cxx11-in-cmake/
-set(CMAKE_CXX_EXTENSIONS OFF)
-boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert)
+    enable_testing()
+
+    if(BOOST_CHARCONV_ENABLE_CUDA)
+
+        message(STATUS "Building Boost.charconv with CUDA")
+
+        find_package(CUDA REQUIRED)
+        enable_language(CUDA)
+        set(CMAKE_CUDA_EXTENSIONS OFF)
+
+        boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} )
+
+    else()
+
+        # https://crascit.com/2015/03/28/enabling-cxx11-in-cmake/
+        set(CMAKE_CXX_EXTENSIONS OFF)
+        boost_test_jamfile(FILE Jamfile LINK_LIBRARIES Boost::charconv Boost::core Boost::assert)
+
+    endif()
 
 endif()
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
new file mode 100644
index 00000000..d91cf413
--- /dev/null
+++ b/test/cuda_jamfile
@@ -0,0 +1,22 @@
+# Copyright 2024 Matt Borland
+# Distributed under the Boost Software License, Version 1.0.
+# https://www.boost.org/LICENSE_1_0.txt
+
+import testing ;
+import ../../config/checks/config : requires ;
+
+project : requirements
+    [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
+    ;
+
+run test_from_chars_char.cu ;
+run test_from_chars_signed_char.cu ;
+run test_from_chars_unsigned_char.cu ;
+run test_from_chars_short.cu ;
+run test_from_chars_unsigned_short.cu ;
+run test_from_chars_int.cu ;
+run test_from_chars_unsigned_int.cu ;
+run test_from_chars_long.cu ;
+run test_from_chars_unsigned_long.cu ;
+run test_from_chars_long_long.cu ;
+run test_from_chars_unsigned_long_long.cu ;
diff --git a/test/test_from_chars_char.cu b/test/test_from_chars_char.cu
new file mode 100644
index 00000000..9e2c6cfb
--- /dev/null
+++ b/test/test_from_chars_char.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = static_cast<test_type>(dist(rng));
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_int.cu b/test/test_from_chars_int.cu
new file mode 100644
index 00000000..a17668b4
--- /dev/null
+++ b/test/test_from_chars_int.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = int;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_long.cu b/test/test_from_chars_long.cu
new file mode 100644
index 00000000..e1ad27eb
--- /dev/null
+++ b/test/test_from_chars_long.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_long_long.cu b/test/test_from_chars_long_long.cu
new file mode 100644
index 00000000..ee9ec056
--- /dev/null
+++ b/test/test_from_chars_long_long.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_short.cu b/test/test_from_chars_short.cu
new file mode 100644
index 00000000..8cdec393
--- /dev/null
+++ b/test/test_from_chars_short.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = short;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_signed_char.cu b/test/test_from_chars_signed_char.cu
new file mode 100644
index 00000000..df064971
--- /dev/null
+++ b/test/test_from_chars_signed_char.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = signed char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = static_cast<test_type>(dist(rng));
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_unsigned_char.cu b/test/test_from_chars_unsigned_char.cu
new file mode 100644
index 00000000..3ec966a0
--- /dev/null
+++ b/test/test_from_chars_unsigned_char.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = static_cast<test_type>(dist(rng));
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_unsigned_int.cu b/test/test_from_chars_unsigned_int.cu
new file mode 100644
index 00000000..40ce8f81
--- /dev/null
+++ b/test/test_from_chars_unsigned_int.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned int;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_unsigned_long.cu b/test/test_from_chars_unsigned_long.cu
new file mode 100644
index 00000000..d2f773e6
--- /dev/null
+++ b/test/test_from_chars_unsigned_long.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_unsigned_long_long.cu b/test/test_from_chars_unsigned_long_long.cu
new file mode 100644
index 00000000..009d4c7a
--- /dev/null
+++ b/test/test_from_chars_unsigned_long_long.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_unsigned_short.cu b/test/test_from_chars_unsigned_short.cu
new file mode 100644
index 00000000..ae280a5a
--- /dev/null
+++ b/test/test_from_chars_unsigned_short.cu
@@ -0,0 +1,114 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned short;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        expected[i] = dist(rng);
+        char* buf = &input_strings[i * BUF_SIZE];
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i]);
+        input_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    std::vector<test_type> results;
+    results.reserve(numElements);
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        test_type val {};
+        const char* str = &input_strings[i * BUF_SIZE];
+        boost::charconv::from_chars(str, str + input_lengths[i], val);
+        results.push_back(val);
+    }
+    double t = w.elapsed();
+    // check the results
+    for(int i = 0; i < numElements; ++i)
+    {
+        if (output_vector[i] != results[i])
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From ac110be7295d7c60f63b6d3d6e64964b037d7fd9 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Fri, 13 Mar 2026 16:50:38 -0500
Subject: [PATCH 5/8] Add testing with bases from 2 to 36

---
 test/cuda_jamfile                             |  12 ++
 test/test_from_chars_bases_char.cu            | 121 ++++++++++++++++++
 test/test_from_chars_bases_int.cu             | 121 ++++++++++++++++++
 test/test_from_chars_bases_long.cu            | 121 ++++++++++++++++++
 test/test_from_chars_bases_long_long.cu       | 121 ++++++++++++++++++
 test/test_from_chars_bases_short.cu           | 121 ++++++++++++++++++
 test/test_from_chars_bases_signed_char.cu     | 121 ++++++++++++++++++
 test/test_from_chars_bases_unsigned_char.cu   | 121 ++++++++++++++++++
 test/test_from_chars_bases_unsigned_int.cu    | 121 ++++++++++++++++++
 test/test_from_chars_bases_unsigned_long.cu   | 121 ++++++++++++++++++
 ...est_from_chars_bases_unsigned_long_long.cu | 121 ++++++++++++++++++
 test/test_from_chars_bases_unsigned_short.cu  | 121 ++++++++++++++++++
 12 files changed, 1343 insertions(+)
 create mode 100644 test/test_from_chars_bases_char.cu
 create mode 100644 test/test_from_chars_bases_int.cu
 create mode 100644 test/test_from_chars_bases_long.cu
 create mode 100644 test/test_from_chars_bases_long_long.cu
 create mode 100644 test/test_from_chars_bases_short.cu
 create mode 100644 test/test_from_chars_bases_signed_char.cu
 create mode 100644 test/test_from_chars_bases_unsigned_char.cu
 create mode 100644 test/test_from_chars_bases_unsigned_int.cu
 create mode 100644 test/test_from_chars_bases_unsigned_long.cu
 create mode 100644 test/test_from_chars_bases_unsigned_long_long.cu
 create mode 100644 test/test_from_chars_bases_unsigned_short.cu

diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index d91cf413..be30752e 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -20,3 +20,15 @@ run test_from_chars_long.cu ;
 run test_from_chars_unsigned_long.cu ;
 run test_from_chars_long_long.cu ;
 run test_from_chars_unsigned_long_long.cu ;
+
+run test_from_chars_bases_char.cu ;
+run test_from_chars_bases_signed_char.cu ;
+run test_from_chars_bases_unsigned_char.cu ;
+run test_from_chars_bases_short.cu ;
+run test_from_chars_bases_unsigned_short.cu ;
+run test_from_chars_bases_int.cu ;
+run test_from_chars_bases_unsigned_int.cu ;
+run test_from_chars_bases_long.cu ;
+run test_from_chars_bases_unsigned_long.cu ;
+run test_from_chars_bases_long_long.cu ;
+run test_from_chars_bases_unsigned_long_long.cu ;
diff --git a/test/test_from_chars_bases_char.cu b/test/test_from_chars_bases_char.cu
new file mode 100644
index 00000000..3a38a8d8
--- /dev/null
+++ b/test/test_from_chars_bases_char.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = static_cast<test_type>(dist(rng));
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_int.cu b/test/test_from_chars_bases_int.cu
new file mode 100644
index 00000000..25e7d9c2
--- /dev/null
+++ b/test/test_from_chars_bases_int.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = int;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_long.cu b/test/test_from_chars_bases_long.cu
new file mode 100644
index 00000000..5b50f1aa
--- /dev/null
+++ b/test/test_from_chars_bases_long.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_long_long.cu b/test/test_from_chars_bases_long_long.cu
new file mode 100644
index 00000000..6926dc57
--- /dev/null
+++ b/test/test_from_chars_bases_long_long.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_short.cu b/test/test_from_chars_bases_short.cu
new file mode 100644
index 00000000..f9ee2852
--- /dev/null
+++ b/test/test_from_chars_bases_short.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = short;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_signed_char.cu b/test/test_from_chars_bases_signed_char.cu
new file mode 100644
index 00000000..9df80852
--- /dev/null
+++ b/test/test_from_chars_bases_signed_char.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = signed char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = static_cast<test_type>(dist(rng));
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_unsigned_char.cu b/test/test_from_chars_bases_unsigned_char.cu
new file mode 100644
index 00000000..15dda9d2
--- /dev/null
+++ b/test/test_from_chars_bases_unsigned_char.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = static_cast<test_type>(dist(rng));
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_unsigned_int.cu b/test/test_from_chars_bases_unsigned_int.cu
new file mode 100644
index 00000000..951137b4
--- /dev/null
+++ b/test/test_from_chars_bases_unsigned_int.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned int;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_unsigned_long.cu b/test/test_from_chars_bases_unsigned_long.cu
new file mode 100644
index 00000000..8e5cc069
--- /dev/null
+++ b/test/test_from_chars_bases_unsigned_long.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_unsigned_long_long.cu b/test/test_from_chars_bases_unsigned_long_long.cu
new file mode 100644
index 00000000..3c848d72
--- /dev/null
+++ b/test/test_from_chars_bases_unsigned_long_long.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_from_chars_bases_unsigned_short.cu b/test/test_from_chars_bases_unsigned_short.cu
new file mode 100644
index 00000000..b7e6649e
--- /dev/null
+++ b/test/test_from_chars_bases_unsigned_short.cu
@@ -0,0 +1,121 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <boost/charconv/from_chars.hpp>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned short;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const char *in_strings, const int *in_lengths, test_type *out, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        const char* str = in_strings + i * BUF_SIZE;
+        test_type val {};
+        boost::charconv::from_chars(str, str + in_lengths[i], val, base);
+        out[i] = val;
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<char> input_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> input_lengths(numElements);
+
+    // Allocate the managed output vector
+    cuda_managed_ptr<test_type> output_vector(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    std::vector<test_type> expected(numElements);
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            expected[i] = dist(rng);
+            char* buf = &input_strings[i * BUF_SIZE];
+            auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, expected[i], base);
+            input_lengths[i] = static_cast<int>(res.ptr - buf);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_strings.get(), input_lengths.get(), output_vector.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        std::vector<test_type> results;
+        results.reserve(numElements);
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            test_type val {};
+            const char* str = &input_strings[i * BUF_SIZE];
+            boost::charconv::from_chars(str, str + input_lengths[i], val, base);
+            results.push_back(val);
+        }
+        double t = w.elapsed();
+        // check the results
+        for(int i = 0; i < numElements; ++i)
+        {
+            if (output_vector[i] != results[i])
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}

From 9f4faa32c7a9d5d3db7e943fc61177c1be8f00c5 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 16 Mar 2026 09:48:56 -0500
Subject: [PATCH 6/8] Disallow builtin 128-bit integers on device

---
 include/boost/charconv/detail/config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/boost/charconv/detail/config.hpp b/include/boost/charconv/detail/config.hpp
index 0badc50b..59b71ba6 100644
--- a/include/boost/charconv/detail/config.hpp
+++ b/include/boost/charconv/detail/config.hpp
@@ -20,7 +20,7 @@
 #endif
 
 // Use 128-bit integers and suppress warnings for using extensions
-#if defined(BOOST_HAS_INT128)
+#if defined(BOOST_HAS_INT128) && !defined(__NVCC__)
 #  define BOOST_CHARCONV_HAS_INT128
 #  define BOOST_CHARCONV_INT128_MAX  static_cast<boost::int128_type>((static_cast<boost::uint128_type>(1) << 127) - 1)
 #  define BOOST_CHARCONV_INT128_MIN  (-BOOST_CHARCONV_INT128_MAX - 1)

From eb191f123291a56cd6c45ddb868dfe49ece722e4 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 16 Mar 2026 10:42:38 -0500
Subject: [PATCH 7/8] Disable emulated128 path on device

---
 include/boost/charconv/detail/from_chars_integer_impl.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp
index fd94f286..56f11b75 100644
--- a/include/boost/charconv/detail/from_chars_integer_impl.hpp
+++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp
@@ -396,10 +396,12 @@ BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars128(const char* first,
 }
 #endif
 
+#ifndef __NVCC__
 BOOST_CHARCONV_GCC5_CONSTEXPR from_chars_result from_chars128(const char* first, const char* last, uint128& value, int base = 10) noexcept
 {
     return from_chars_integer_impl<uint128, uint128>(first, last, value, base);
 }
+#endif
 
 }}} // Namespaces
 

From 977fb1fbafe39964585a7da03e6ee8790f14d013 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Mon, 16 Mar 2026 11:23:39 -0500
Subject: [PATCH 8/8] Replace uses of numeric limits on device

---
 .../detail/from_chars_integer_impl.hpp        | 29 ++++++++++++++++---
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/boost/charconv/detail/from_chars_integer_impl.hpp b/include/boost/charconv/detail/from_chars_integer_impl.hpp
index 56f11b75..4c95cf3f 100644
--- a/include/boost/charconv/detail/from_chars_integer_impl.hpp
+++ b/include/boost/charconv/detail/from_chars_integer_impl.hpp
@@ -137,6 +137,27 @@ BOOST_CHARCONV_HOST_DEVICE constexpr unsigned char digit_from_char(const char va
 
 #endif
 
+#ifdef __NVCC__
+
+template <typename T>
+__host__ __device__ constexpr T get_max_value()
+{
+    using UT = typename std::make_unsigned<T>::type;
+    return std::is_signed<T>::value
+        ? static_cast<T>(static_cast<UT>(-1) >> 1)
+        : static_cast<T>(static_cast<UT>(-1));
+}
+
+#else
+
+template <typename T>
+constexpr T get_max_value()
+{
+    return (std::numeric_limits<T>::max)();
+}
+
+#endif
+
 template <typename Integer, typename Unsigned_Integer>
 BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_integer_impl(const char* first, const char* last, Integer& value, int base) noexcept
 {
@@ -226,8 +247,8 @@ BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_in
         else
         #endif
         {
-            overflow_value = static_cast<Unsigned_Integer>((std::numeric_limits<Integer>::max)());
-            max_digit = static_cast<Unsigned_Integer>((std::numeric_limits<Integer>::max)());
+            overflow_value = static_cast<Unsigned_Integer>(get_max_value<Integer>());
+            max_digit = static_cast<Unsigned_Integer>(get_max_value<Integer>());
         }
 
         if (is_negative)
@@ -252,8 +273,8 @@ BOOST_CHARCONV_HOST_DEVICE BOOST_CXX14_CONSTEXPR from_chars_result from_chars_in
         else
         #endif
         {
-            overflow_value = (std::numeric_limits<Unsigned_Integer>::max)();
-            max_digit = (std::numeric_limits<Unsigned_Integer>::max)();
+            overflow_value = get_max_value<Unsigned_Integer>();
+            max_digit = get_max_value<Unsigned_Integer>();
         }
     }