diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 578fb5e527..5e91faa944 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -820,7 +820,7 @@ jobs: run: | cd ../boost-root mkdir __build__ && cd __build__ - cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 .. + cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=17 .. - name: Build tests run: | cd ../boost-root/__build__ diff --git a/include/boost/math/special_functions/lanczos.hpp b/include/boost/math/special_functions/lanczos.hpp index 0ec24bddbf..409e14c5e1 100644 --- a/include/boost/math/special_functions/lanczos.hpp +++ b/include/boost/math/special_functions/lanczos.hpp @@ -2751,7 +2751,7 @@ struct lanczos } // namespace math } // namespace boost -#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) +#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) #if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT) #include #endif diff --git a/include/boost/math/special_functions/next.hpp b/include/boost/math/special_functions/next.hpp index 74c34f06ad..f73cb2f6a5 100644 --- a/include/boost/math/special_functions/next.hpp +++ b/include/boost/math/special_functions/next.hpp @@ -25,7 +25,7 @@ #include -#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) +#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) #include "xmmintrin.h" #define BOOST_MATH_CHECK_SSE2 diff --git a/include/boost/math/special_functions/sign.hpp b/include/boost/math/special_functions/sign.hpp index 4f76522654..5d2dfc2e23 100644 --- a/include/boost/math/special_functions/sign.hpp +++ b/include/boost/math/special_functions/sign.hpp @@ -14,7 +14,7 @@ #pragma once #endif -#ifndef __CUDACC_RTC__ +#ifndef BOOST_MATH_HAS_NVRTC #include #include @@ -234,7 +234,7 @@ BOOST_MATH_GPU_ENABLED T sign(T z) } // namespace math } // namespace boost -#endif // __CUDACC_RTC__ +#endif // BOOST_MATH_HAS_NVRTC #endif // BOOST_MATH_TOOLS_SIGN_HPP diff --git a/include/boost/math/tools/config.hpp b/include/boost/math/tools/config.hpp index 4829ebe36e..40346c9b0f 100644 --- a/include/boost/math/tools/config.hpp +++ b/include/boost/math/tools/config.hpp @@ -11,7 +11,7 @@ #pragma once #endif -#ifndef __CUDACC_RTC__ +#if !(defined(__CUDACC_RTC__) && defined(BOOST_MATH_ENABLE_NVRTC)) #include @@ -168,7 +168,7 @@ # define BOOST_MATH_NOINLINE __declspec(noinline) # elif defined(__GNUC__) && __GNUC__ > 3 // Clang also defines __GNUC__ (as 4) -# if defined(__CUDACC__) +# if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA) // nvcc doesn't always parse __noinline__, // see: https://svn.boost.org/trac/boost/ticket/9392 # define BOOST_MATH_NOINLINE __attribute__ ((noinline)) @@ -678,7 +678,7 @@ namespace boost{ namespace math{ // CUDA support: // -#ifdef __CUDACC__ +#if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA) // We have to get our include order correct otherwise you get compilation failures #include @@ -774,7 +774,7 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return # define BOOST_MATH_STATIC_LOCAL_VARIABLE # else # define BOOST_MATH_INLINE_CONSTEXPR constexpr -# define BOOST_MATH_STATIC constexpr +# define BOOST_MATH_STATIC static # define BOOST_MATH_STATIC_LOCAL_VARIABLE static # endif #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4715cf379b..9d673adc7d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -10,13 +10,13 @@ if(HAVE_BOOST_TEST) message(STATUS "Building boost.math with CUDA") - find_package(CUDA REQUIRED) enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) set(CMAKE_CUDA_EXTENSIONS OFF) enable_testing() - boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} ) + boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework CUDA::cudart COMPILE_DEFINITIONS BOOST_MATH_ENABLE_CUDA=1 ) elseif (BOOST_MATH_ENABLE_NVRTC) diff --git a/test/cuda_jamfile b/test/cuda_jamfile index e4d7505da8..f2f2c9aa82 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -9,6 +9,10 @@ project : requirements [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ] ; +# Github Issues +run github_issue_1383.cu ; +run github_issue_1383_pt_2.cu ; + # Quad run test_exp_sinh_quad_float.cu ; run test_exp_sinh_quad_double.cu ; diff --git a/test/github_issue_1383.cu b/test/github_issue_1383.cu new file mode 100644 index 0000000000..119f5d1d5e --- /dev/null +++ b/test/github_issue_1383.cu @@ -0,0 +1,120 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifdef BOOST_MATH_ENABLE_CUDA +# undef BOOST_MATH_ENABLE_CUDA +#endif // BOOST_MATH_ENABLE_CUDA + +// Purposefully pull in headers that caused errors in the linked issue +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +#ifdef BOOST_MATH_ENABLE_CUDA +# error "We should not be enabling this ourselves" +#endif // BOOST_MATH_ENABLE_CUDA + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cos(in[i]); + } +} + +/** + * Host main routine + */ +int main() +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + // Check some of our numeric_limits for viability + std::mt19937_64 rng {42}; + std::uniform_real_distribution dist(0, boost::math::constants::pi()); + static_assert(boost::math::numeric_limits::is_specialized, "Should be since it's a double"); + static_assert(boost::math::numeric_limits::is_signed, "Should be since it's a double"); + + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(std::cos(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/github_issue_1383_pt_2.cu b/test/github_issue_1383_pt_2.cu new file mode 100644 index 0000000000..8e746abfce --- /dev/null +++ b/test/github_issue_1383_pt_2.cu @@ -0,0 +1,116 @@ +// Copyright John Maddock 2016. +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Purposefully pull in headers that caused errors in the linked issue +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +typedef double float_type; + +/** + * CUDA Kernel Device code + * + */ +__global__ void cuda_test(const float_type *in, float_type *out, int numElements) +{ + using std::cos; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + out[i] = cos(in[i]); + if (out[i] > boost::math::numeric_limits::max() || !boost::math::numeric_limits::is_signed) + { + __trap(); + } + } +} + +/** + * Host main routine + */ +int main() +{ + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + // Initialize the input vectors + // Check some of our numeric_limits for viability + std::mt19937_64 rng {42}; + std::uniform_real_distribution dist(0, boost::math::constants::pi()); + static_assert(boost::math::numeric_limits::is_specialized, "Should be since it's a double"); + static_assert(boost::math::numeric_limits::is_signed, "Should be since it's a double"); + + for (int i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_vector.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + results.push_back(std::cos(input_vector[i])); + double t = w.elapsed(); + // check the results + for(int i = 0; i < numElements; ++i) + { + if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}