Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,21 +80,21 @@ jobs:
fi
git config --global pack.threads 0

- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
# For coverage builds fetch the whole history, else only 1 commit using a 'fake ternary'
fetch-depth: ${{ matrix.coverage && '0' || '1' }}

- name: Cache ccache
uses: actions/cache@v3
uses: actions/cache@v4
if: env.B2_USE_CCACHE
with:
path: ~/.ccache
key: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}-${{github.sha}}
restore-keys: ${{matrix.os}}-${{matrix.container}}-${{matrix.compiler}}-

- name: Fetch Boost.CI
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: boostorg/boost-ci
ref: master
Expand Down
14 changes: 14 additions & 0 deletions include/boost/int128/detail/conversions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexc

#endif // BOOST_INT128_ENDIAN_LITTLE_BYTE

//=====================================
// Conversion Operators
//=====================================

BOOST_INT128_HOST_DEVICE constexpr int128_t::operator uint128_t() const noexcept
{
return uint128_t{static_cast<std::uint64_t>(this->high), static_cast<std::uint64_t>(this->low)};
}

BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator int128_t() const noexcept
{
return int128_t{static_cast<std::int64_t>(this->high), static_cast<std::uint64_t>(this->low)};
}

//=====================================
// Comparison Operators
//=====================================
Expand Down
1 change: 1 addition & 0 deletions include/boost/int128/detail/int128_imp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ int128_t

// Requires a conversion file to be implemented
BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept;
BOOST_INT128_HOST_DEVICE explicit constexpr operator uint128_t() const noexcept;

// Construct from integral types
#if BOOST_INT128_ENDIAN_LITTLE_BYTE
Expand Down
1 change: 1 addition & 0 deletions include/boost/int128/detail/uint128_imp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ uint128_t

// Requires a conversion file to be implemented
BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept;
BOOST_INT128_HOST_DEVICE explicit constexpr operator int128_t() const noexcept;

// Construct from integral types
#if BOOST_INT128_ENDIAN_LITTLE_BYTE
Expand Down
83 changes: 42 additions & 41 deletions include/boost/int128/numeric.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr uint128_t sub_sat(const u
return z;
}

BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(int128_t x, int128_t y) noexcept;
BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(int128_t x, int128_t y) noexcept;

#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable : 4307) // Addition Overflow
Expand All @@ -87,54 +84,52 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(int128_t

BOOST_INT128_HOST_DEVICE constexpr int128_t add_sat(const int128_t x, const int128_t y) noexcept
{
if (x >= 0 && y >= 0)
{
constexpr auto max_value {static_cast<uint128_t>((std::numeric_limits<int128_t>::max)())};
const auto big_x {static_cast<uint128_t>(x)};
const auto big_y {static_cast<uint128_t>(y)};
const auto big_res {big_x + big_y};
// Detect overflow BEFORE the addition to avoid signed overflow UB.
// When both are non-negative: overflow iff x > max - y (subtraction safe: max - non_negative >= 0)
// When both are negative: overflow iff x < min - y (subtraction safe: min - negative > min)
// Mixed signs: overflow is impossible.

return big_res > max_value ? (std::numeric_limits<int128_t>::max)() : static_cast<int128_t>(big_res);
}
else if ((x < 0 && y > 0) || (x > 0 && y < 0))
if (x.high >= 0 && y.high >= 0)
{
return x + y;
if (x > (std::numeric_limits<int128_t>::max)() - y)
{
return (std::numeric_limits<int128_t>::max)();
}
}
else
else if (x.high < 0 && y.high < 0)
{
// x < 0 and y < 0
// Nearly the same technique as the positive values case
constexpr auto max_value {-static_cast<uint128_t>((std::numeric_limits<int128_t>::min)())};
const auto big_x {static_cast<uint128_t>(abs(x))};
const auto big_y {static_cast<uint128_t>(abs(y))};
const auto big_res {big_x + big_y};

return big_res > max_value ? (std::numeric_limits<int128_t>::min)() : -static_cast<int128_t>(big_res);
if (x < (std::numeric_limits<int128_t>::min)() - y)
{
return (std::numeric_limits<int128_t>::min)();
}
}

return x + y;
}

BOOST_INT128_HOST_DEVICE constexpr int128_t sub_sat(const int128_t x, const int128_t y) noexcept
{
if (x <= 0 && y >= 0)
{
// Underflow case
const auto res {x - y};
return res > x ? (std::numeric_limits<int128_t>::min)() : res;
}
else if (x > 0 && y < 0)
{
// Overflow Case
constexpr auto max_val {static_cast<uint128_t>((std::numeric_limits<int128_t>::max)())};
const auto big_x {static_cast<uint128_t>(x)};
const auto big_y {-static_cast<uint128_t>(y)};
const auto res {big_x + big_y};
// Detect overflow BEFORE the subtraction to avoid signed overflow UB.
// Positive overflow: x >= 0 and y < 0 and x > max + y (safe: max + negative < max)
// Negative overflow: x < 0 and y >= 0 and x < min + y (safe: min + non_negative > min)
// Same signs: overflow is impossible.

return (res > max_val || res < big_x) ? (std::numeric_limits<int128_t>::max)() : static_cast<int128_t>(res);
if (x.high >= 0 && y.high < 0)
{
if (x > (std::numeric_limits<int128_t>::max)() + y)
{
return (std::numeric_limits<int128_t>::max)();
}
}
else
else if (x.high < 0 && y.high >= 0)
{
return x - y;
if (x < (std::numeric_limits<int128_t>::min)() + y)
{
return (std::numeric_limits<int128_t>::min)();
}
}

return x - y;
}

#ifdef _MSC_VER
Expand Down Expand Up @@ -404,11 +399,17 @@ BOOST_INT128_HOST_DEVICE constexpr int128_t midpoint(const int128_t a, const int
// For signed integers, we use a + (b - a) / 2 or a - (a - b) / 2
// The subtraction is done in unsigned arithmetic to handle overflow correctly
// Integer division automatically rounds toward the first argument
//
// Use direct field access for both the uint128 construction and the
// comparison to avoid NVCC host compiler issues with operator<= and
// static_cast on int128_t for large-magnitude values

const uint128_t ua {static_cast<std::uint64_t>(a.high), a.low};
const uint128_t ub {static_cast<std::uint64_t>(b.high), b.low};

const auto ua {static_cast<uint128_t>(a)};
const auto ub {static_cast<uint128_t>(b)};
const bool a_le_b {a.high == b.high ? a.low <= b.low : a.high < b.high};

if (a <= b)
if (a_le_b)
{
// diff = b - a (computed in unsigned, handles wrap-around correctly)
const auto diff {ub - ua};
Expand Down
15 changes: 15 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,18 @@ run test_signed_cstdlib_div.cu ;

run test_signed_to_unsigned_conversion.cu ;
run test_unsigned_to_signed_conversion.cu ;

run test_unsigned_add_sat.cu ;
run test_signed_add_sat.cu ;
run test_unsigned_sub_sat.cu ;
run test_signed_sub_sat.cu ;
run test_unsigned_mul_sat.cu ;
run test_signed_mul_sat.cu ;
run test_unsigned_div_sat.cu ;
run test_signed_div_sat.cu ;
run test_unsigned_gcd.cu ;
run test_signed_gcd.cu ;
run test_unsigned_lcm.cu ;
run test_signed_lcm.cu ;
run test_unsigned_midpoint.cu ;
run test_signed_midpoint.cu ;
106 changes: 106 additions & 0 deletions test/test_signed_add_sat.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#define BOOST_INT128_ALLOW_SIGN_CONVERSION

#include <iostream>
#include <vector>
#include <random>
#include <limits>
#include <boost/int128.hpp>
#include <boost/int128/numeric.hpp>
#include <boost/int128/random.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#include <cuda_runtime.h>

using test_type = boost::int128::int128_t;

__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = boost::int128::add_sat(in[i], in2[i]);
}
}

int main(void)
{
std::mt19937_64 rng {42};

cudaError_t err = cudaSuccess;

int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> input_vector2(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);

boost::random::uniform_int_distribution<test_type> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
for (std::size_t i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
input_vector2[i] = dist(rng);
}

int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();
if (err != cudaSuccess)
{
std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(boost::int128::add_sat(input_vector[i], input_vector2[i]));
}
double t = w.elapsed();

int fail_count = 0;
for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
if (fail_count < 5)
{
std::cerr << "Result verification failed at element " << i << std::endl;
std::cerr << " input1 high: " << input_vector[i].high << " low: " << input_vector[i].low << std::endl;
std::cerr << " input2 high: " << input_vector2[i].high << " low: " << input_vector2[i].low << std::endl;
std::cerr << " GPU high: " << output_vector[i].high << " low: " << output_vector[i].low << std::endl;
std::cerr << " CPU high: " << results[i].high << " low: " << results[i].low << std::endl;
}
++fail_count;
}
}
if (fail_count > 0)
{
std::cerr << "Total failures: " << fail_count << " out of " << numElements << std::endl;
return EXIT_FAILURE;
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading
Loading