Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ jobs:
run: |
cd ../boost-root
mkdir __build__ && cd __build__
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=86 -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-12.8 ..
cmake -DBOOST_INCLUDE_LIBRARIES=$LIBRARY -DBUILD_TESTING=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DBOOST_MATH_ENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75;86" -DCMAKE_CUDA_STANDARD=17 ..
- name: Build tests
run: |
cd ../boost-root/__build__
Expand Down
2 changes: 1 addition & 1 deletion include/boost/math/special_functions/lanczos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,7 @@ struct lanczos
} // namespace math
} // namespace boost

#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if ((defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)) && !defined(_MANAGED) && !defined(BOOST_MATH_HAS_GPU_SUPPORT)
#include <boost/math/special_functions/detail/lanczos_sse2.hpp>
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/boost/math/special_functions/next.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <cfloat>


#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if !defined(_CRAYC) && !defined(BOOST_MATH_ENABLE_CUDA) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
#include "xmmintrin.h"
#define BOOST_MATH_CHECK_SSE2
Expand Down
4 changes: 2 additions & 2 deletions include/boost/math/special_functions/sign.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#pragma once
#endif

#ifndef __CUDACC_RTC__
#ifndef BOOST_MATH_HAS_NVRTC

#include <boost/math/tools/config.hpp>
#include <boost/math/special_functions/math_fwd.hpp>
Expand Down Expand Up @@ -234,7 +234,7 @@ BOOST_MATH_GPU_ENABLED T sign(T z)
} // namespace math
} // namespace boost

#endif // __CUDACC_RTC__
#endif // BOOST_MATH_HAS_NVRTC

#endif // BOOST_MATH_TOOLS_SIGN_HPP

Expand Down
8 changes: 4 additions & 4 deletions include/boost/math/tools/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#pragma once
#endif

#ifndef __CUDACC_RTC__
#if !(defined(__CUDACC_RTC__) && defined(BOOST_MATH_ENABLE_NVRTC))

#include <boost/math/tools/is_standalone.hpp>

Expand Down Expand Up @@ -168,7 +168,7 @@
# define BOOST_MATH_NOINLINE __declspec(noinline)
# elif defined(__GNUC__) && __GNUC__ > 3
// Clang also defines __GNUC__ (as 4)
# if defined(__CUDACC__)
# if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)
// nvcc doesn't always parse __noinline__,
// see: https://svn.boost.org/trac/boost/ticket/9392
# define BOOST_MATH_NOINLINE __attribute__ ((noinline))
Expand Down Expand Up @@ -678,7 +678,7 @@ namespace boost{ namespace math{
// CUDA support:
//

#ifdef __CUDACC__
#if defined(__CUDACC__) && defined(BOOST_MATH_ENABLE_CUDA)

// We have to get our include order correct otherwise you get compilation failures
#include <cuda.h>
Expand Down Expand Up @@ -774,7 +774,7 @@ BOOST_MATH_GPU_ENABLED constexpr T gpu_safe_max(const T& a, const T& b) { return
# define BOOST_MATH_STATIC_LOCAL_VARIABLE
# else
# define BOOST_MATH_INLINE_CONSTEXPR constexpr
# define BOOST_MATH_STATIC constexpr
# define BOOST_MATH_STATIC static
# define BOOST_MATH_STATIC_LOCAL_VARIABLE static
# endif
#endif
Expand Down
4 changes: 2 additions & 2 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ if(HAVE_BOOST_TEST)

message(STATUS "Building boost.math with CUDA")

find_package(CUDA REQUIRED)
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_CUDA_EXTENSIONS OFF)

enable_testing()

boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework ${CUDA_LIBRARIES} INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} )
boost_test_jamfile(FILE cuda_jamfile LINK_LIBRARIES Boost::math Boost::assert Boost::concept_check Boost::config Boost::core Boost::integer Boost::lexical_cast Boost::multiprecision Boost::predef Boost::random Boost::throw_exception Boost::unit_test_framework CUDA::cudart COMPILE_DEFINITIONS BOOST_MATH_ENABLE_CUDA=1 )

elseif (BOOST_MATH_ENABLE_NVRTC)

Expand Down
4 changes: 4 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ project : requirements
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
;

# Github Issues
run github_issue_1383.cu ;
run github_issue_1383_pt_2.cu ;

# Quad
run test_exp_sinh_quad_float.cu ;
run test_exp_sinh_quad_double.cu ;
Expand Down
120 changes: 120 additions & 0 deletions test/github_issue_1383.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024 - 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifdef BOOST_MATH_ENABLE_CUDA
# undef BOOST_MATH_ENABLE_CUDA
#endif // BOOST_MATH_ENABLE_CUDA

// Purposefully pull in headers that caused errors in the linked issue
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/config.hpp>
#include <boost/math/tools/numeric_limits.hpp>
#include <boost/math/constants/constants.hpp>

#include <iostream>
#include <iomanip>
#include <vector>
#include <cmath>
#include <random>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#ifdef BOOST_MATH_ENABLE_CUDA
# error "We should not be enabling this ourselves"
#endif // BOOST_MATH_ENABLE_CUDA

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef double float_type;

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
{
using std::cos;
const int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = cos(in[i]);
}
}

/**
* Host main routine
*/
int main()
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
// Check some of our numeric_limits for viability
std::mt19937_64 rng {42};
std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");

for (int i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(std::cos(input_vector[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
116 changes: 116 additions & 0 deletions test/github_issue_1383_pt_2.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright John Maddock 2016.
// Copyright Matt Borland 2024 - 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

// Purposefully pull in headers that caused errors in the linked issue
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/config.hpp>
#include <boost/math/tools/numeric_limits.hpp>
#include <boost/math/constants/constants.hpp>

#include <iostream>
#include <iomanip>
#include <vector>
#include <cmath>
#include <random>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef double float_type;

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(const float_type *in, float_type *out, int numElements)
{
using std::cos;
const int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = cos(in[i]);
if (out[i] > boost::math::numeric_limits<float_type>::max() || !boost::math::numeric_limits<float_type>::is_signed)
{
__trap();
}
}
}

/**
* Host main routine
*/
int main()
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
// Check some of our numeric_limits for viability
std::mt19937_64 rng {42};
std::uniform_real_distribution<float_type> dist(0, boost::math::constants::pi<float_type>());
static_assert(boost::math::numeric_limits<float_type>::is_specialized, "Should be since it's a double");
static_assert(boost::math::numeric_limits<float_type>::is_signed, "Should be since it's a double");

for (int i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
for(int i = 0; i < numElements; ++i)
results.push_back(std::cos(input_vector[i]));
double t = w.elapsed();
// check the results
for(int i = 0; i < numElements; ++i)
{
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading