From 5d0f13dd39af1474616657293c54cb5ab92f0832 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:40:38 -0400 Subject: [PATCH 1/3] Add CUDA testing of functions --- test/cuda_jamfile | 15 ++++++ test/test_signed_add_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_div_sat.cu | 96 ++++++++++++++++++++++++++++++++++ test/test_signed_gcd.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_lcm.cu | 94 +++++++++++++++++++++++++++++++++ test/test_signed_midpoint.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_mul_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_signed_sub_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_add_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_div_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_gcd.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_lcm.cu | 94 +++++++++++++++++++++++++++++++++ test/test_unsigned_midpoint.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_mul_sat.cu | 93 ++++++++++++++++++++++++++++++++ test/test_unsigned_sub_sat.cu | 93 ++++++++++++++++++++++++++++++++ 15 files changed, 1322 insertions(+) create mode 100644 test/test_signed_add_sat.cu create mode 100644 test/test_signed_div_sat.cu create mode 100644 test/test_signed_gcd.cu create mode 100644 test/test_signed_lcm.cu create mode 100644 test/test_signed_midpoint.cu create mode 100644 test/test_signed_mul_sat.cu create mode 100644 test/test_signed_sub_sat.cu create mode 100644 test/test_unsigned_add_sat.cu create mode 100644 test/test_unsigned_div_sat.cu create mode 100644 test/test_unsigned_gcd.cu create mode 100644 test/test_unsigned_lcm.cu create mode 100644 test/test_unsigned_midpoint.cu create mode 100644 test/test_unsigned_mul_sat.cu create mode 100644 test/test_unsigned_sub_sat.cu diff --git a/test/cuda_jamfile b/test/cuda_jamfile index cd2fcc13..2d41d916 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -61,3 +61,18 @@ run test_signed_ge.cu ; run test_unsigned_cstdlib_div.cu ; run test_signed_cstdlib_div.cu ; + +run test_unsigned_add_sat.cu ; +run test_signed_add_sat.cu ; +run test_unsigned_sub_sat.cu ; +run test_signed_sub_sat.cu ; +run test_unsigned_mul_sat.cu ; +run test_signed_mul_sat.cu ; +run test_unsigned_div_sat.cu ; +run test_signed_div_sat.cu ; +run test_unsigned_gcd.cu ; +run test_signed_gcd.cu ; +run test_unsigned_lcm.cu ; +run test_signed_lcm.cu ; +run test_unsigned_midpoint.cu ; +run test_signed_midpoint.cu ; diff --git a/test/test_signed_add_sat.cu b/test/test_signed_add_sat.cu new file mode 100644 index 00000000..a86457f9 --- /dev/null +++ b/test/test_signed_add_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::add_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::add_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_div_sat.cu b/test/test_signed_div_sat.cu new file mode 100644 index 00000000..804d4dc9 --- /dev/null +++ b/test/test_signed_div_sat.cu @@ -0,0 +1,96 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + do + { + input_vector2[i] = dist(rng); + } while (input_vector2[i] == 0); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_gcd.cu b/test/test_signed_gcd.cu new file mode 100644 index 00000000..7d5c8434 --- /dev/null +++ b/test/test_signed_gcd.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::gcd(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::gcd(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_lcm.cu b/test/test_signed_lcm.cu new file mode 100644 index 00000000..bafe559d --- /dev/null +++ b/test/test_signed_lcm.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::lcm(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // Use smaller values to avoid overflow in lcm computation + boost::random::uniform_int_distribution dist {test_type{-1, UINT64_MAX}, test_type{0, UINT64_MAX}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::lcm(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_midpoint.cu b/test/test_signed_midpoint.cu new file mode 100644 index 00000000..803ba974 --- /dev/null +++ b/test/test_signed_midpoint.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::midpoint(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::midpoint(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_mul_sat.cu b/test/test_signed_mul_sat.cu new file mode 100644 index 00000000..569e583c --- /dev/null +++ b/test/test_signed_mul_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::mul_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::mul_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_signed_sub_sat.cu b/test/test_signed_sub_sat.cu new file mode 100644 index 00000000..7dd40f30 --- /dev/null +++ b/test/test_signed_sub_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::int128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::sub_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::sub_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_add_sat.cu b/test/test_unsigned_add_sat.cu new file mode 100644 index 00000000..3cfc0317 --- /dev/null +++ b/test/test_unsigned_add_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::add_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::add_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_div_sat.cu b/test/test_unsigned_div_sat.cu new file mode 100644 index 00000000..9f76b869 --- /dev/null +++ b/test/test_unsigned_div_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::div_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{1U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::div_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_gcd.cu b/test/test_unsigned_gcd.cu new file mode 100644 index 00000000..f23abe48 --- /dev/null +++ b/test/test_unsigned_gcd.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::gcd(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::gcd(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_lcm.cu b/test/test_unsigned_lcm.cu new file mode 100644 index 00000000..d586d58b --- /dev/null +++ b/test/test_unsigned_lcm.cu @@ -0,0 +1,94 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::lcm(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + // Use smaller values to avoid overflow in lcm computation + boost::random::uniform_int_distribution dist {test_type{0U}, test_type{0U, UINT64_MAX}}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::lcm(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_midpoint.cu b/test/test_unsigned_midpoint.cu new file mode 100644 index 00000000..e695b5ff --- /dev/null +++ b/test/test_unsigned_midpoint.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::midpoint(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::midpoint(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_mul_sat.cu b/test/test_unsigned_mul_sat.cu new file mode 100644 index 00000000..228ef806 --- /dev/null +++ b/test/test_unsigned_mul_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::mul_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::mul_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_unsigned_sub_sat.cu b/test/test_unsigned_sub_sat.cu new file mode 100644 index 00000000..73bf36d7 --- /dev/null +++ b/test/test_unsigned_sub_sat.cu @@ -0,0 +1,93 @@ +// Copyright Matt Borland 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#define BOOST_INT128_ALLOW_SIGN_CONVERSION + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#include + +using test_type = boost::int128::uint128_t; + +__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = boost::int128::sub_sat(in[i], in2[i]); + } +} + +int main(void) +{ + std::mt19937_64 rng {42}; + + cudaError_t err = cudaSuccess; + + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + cuda_managed_ptr input_vector(numElements); + cuda_managed_ptr input_vector2(numElements); + cuda_managed_ptr output_vector(numElements); + + boost::random::uniform_int_distribution dist {test_type{0U}, (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + input_vector2[i] = dist(rng); + } + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + std::vector results; + results.reserve(numElements); + w.reset(); + for (int i = 0; i < numElements; ++i) + { + results.push_back(boost::int128::sub_sat(input_vector[i], input_vector2[i])); + } + double t = w.elapsed(); + + for (int i = 0; i < numElements; ++i) + { + if (output_vector[i] != results[i]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} From 1a7001183d83aa414a07b248626b2eded172126d Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Wed, 11 Mar 2026 13:56:10 -0400 Subject: [PATCH 2/3] Improve diagnostics --- test/test_signed_add_sat.cu | 17 +++++++++++++++-- test/test_signed_midpoint.cu | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/test/test_signed_add_sat.cu b/test/test_signed_add_sat.cu index a86457f9..45b45116 100644 --- a/test/test_signed_add_sat.cu +++ b/test/test_signed_add_sat.cu @@ -77,14 +77,27 @@ int main(void) } double t = w.elapsed(); + int fail_count = 0; for (int i = 0; i < numElements; ++i) { if (output_vector[i] != results[i]) { - std::cerr << "Result verification failed at element " << i << "!" << std::endl; - return EXIT_FAILURE; + if (fail_count < 5) + { + std::cerr << "Result verification failed at element " << i << std::endl; + std::cerr << " input1 high: " << input_vector[i].high << " low: " << input_vector[i].low << std::endl; + std::cerr << " input2 high: " << input_vector2[i].high << " low: " << input_vector2[i].low << std::endl; + std::cerr << " GPU high: " << output_vector[i].high << " low: " << output_vector[i].low << std::endl; + std::cerr << " CPU high: " << results[i].high << " low: " << results[i].low << std::endl; + } + ++fail_count; } } + if (fail_count > 0) + { + std::cerr << "Total failures: " << fail_count << " out of " << numElements << std::endl; + return EXIT_FAILURE; + } std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; diff --git a/test/test_signed_midpoint.cu b/test/test_signed_midpoint.cu index 803ba974..5ee28d71 100644 --- a/test/test_signed_midpoint.cu +++ b/test/test_signed_midpoint.cu @@ -77,14 +77,27 @@ int main(void) } double t = w.elapsed(); + int fail_count = 0; for (int i = 0; i < numElements; ++i) { if (output_vector[i] != results[i]) { - std::cerr << "Result verification failed at element " << i << "!" << std::endl; - return EXIT_FAILURE; + if (fail_count < 5) + { + std::cerr << "Result verification failed at element " << i << std::endl; + std::cerr << " input1 high: " << input_vector[i].high << " low: " << input_vector[i].low << std::endl; + std::cerr << " input2 high: " << input_vector2[i].high << " low: " << input_vector2[i].low << std::endl; + std::cerr << " GPU high: " << output_vector[i].high << " low: " << output_vector[i].low << std::endl; + std::cerr << " CPU high: " << results[i].high << " low: " << results[i].low << std::endl; + } + ++fail_count; } } + if (fail_count > 0) + { + std::cerr << "Total failures: " << fail_count << " out of " << numElements << std::endl; + return EXIT_FAILURE; + } std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; std::cout << "Done\n"; From b92d7ec6ba812c84a266836eafec13eb539c9e6f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 12 Mar 2026 14:25:06 -0400 Subject: [PATCH 3/3] Implement conversion operators between types --- include/boost/int128/detail/conversions.hpp | 14 ++++++++++++++ include/boost/int128/detail/int128_imp.hpp | 1 + include/boost/int128/detail/uint128_imp.hpp | 1 + 3 files changed, 16 insertions(+) diff --git a/include/boost/int128/detail/conversions.hpp b/include/boost/int128/detail/conversions.hpp index a549db03..f471d570 100644 --- a/include/boost/int128/detail/conversions.hpp +++ b/include/boost/int128/detail/conversions.hpp @@ -38,6 +38,20 @@ BOOST_INT128_HOST_DEVICE constexpr uint128_t::uint128_t(const int128_t& v) noexc #endif // BOOST_INT128_ENDIAN_LITTLE_BYTE +//===================================== +// Conversion Operators +//===================================== + +BOOST_INT128_HOST_DEVICE constexpr int128_t::operator uint128_t() const noexcept +{ + return uint128_t{static_cast(this->high), static_cast(this->low)}; +} + +BOOST_INT128_HOST_DEVICE constexpr uint128_t::operator int128_t() const noexcept +{ + return int128_t{static_cast(this->high), static_cast(this->low)}; +} + //===================================== // Comparison Operators //===================================== diff --git a/include/boost/int128/detail/int128_imp.hpp b/include/boost/int128/detail/int128_imp.hpp index d2bdfa62..4e5fcc11 100644 --- a/include/boost/int128/detail/int128_imp.hpp +++ b/include/boost/int128/detail/int128_imp.hpp @@ -57,6 +57,7 @@ int128_t // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr int128_t(const uint128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator uint128_t() const noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE diff --git a/include/boost/int128/detail/uint128_imp.hpp b/include/boost/int128/detail/uint128_imp.hpp index 26aa9370..fdd0b309 100644 --- a/include/boost/int128/detail/uint128_imp.hpp +++ b/include/boost/int128/detail/uint128_imp.hpp @@ -58,6 +58,7 @@ uint128_t // Requires a conversion file to be implemented BOOST_INT128_HOST_DEVICE explicit constexpr uint128_t(const int128_t& v) noexcept; + BOOST_INT128_HOST_DEVICE explicit constexpr operator int128_t() const noexcept; // Construct from integral types #if BOOST_INT128_ENDIAN_LITTLE_BYTE