From fc9ac022c871540abb29da49c4895a6035aa5481 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Sat, 4 Apr 2026 01:53:59 +0000 Subject: [PATCH 1/6] [NVRTC] Warn on CUDA version mismatch after compilation failure When NVRTC kernel compilation fails, detect whether the linked NVRTC library and the CUDA headers used for compilation are from different CUDA versions, and if so emit an actionable note to stderr pointing the user toward NVTE_CUDA_INCLUDE_DIR / CUDA_HOME / LD_LIBRARY_PATH. The header version is obtained by compiling a tiny probe program that embeds CUDA_VERSION (from cuda.h) into a static_assert failure message, so the macro is resolved by the actual preprocessor rather than by parsing header text. All probe failures are silent; the check is purely informational and never causes a premature error. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Tim Moon --- transformer_engine/common/util/rtc.cpp | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index 7925fdceea..ab92de6913 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -25,6 +25,61 @@ namespace { #include "string_code_util_math_h.h" #include "string_code_utils_cuh.h" +/*! \brief CUDA version reported by the headers at the configured include path + * + * Compiles a tiny probe through NVRTC using the same include path as real + * RTC kernels. Including cuda.h ensures CUDA_VERSION is defined, and + * embedding it in a static_assert failure message lets us extract the + * resolved value from the diagnostic log without parsing header text. + * + * \return CUDA version encoded as major * 1000 + minor * 10 + * (e.g. 12040 for CUDA 12.4), or -1 if it could not be determined. + */ +int cuda_header_version() { + const std::string &include_dir = cuda::include_directory(false); + if (include_dir.empty()) { + return -1; + } + + static const char probe_source[] = + "#include \n" + "#define NVTE_STRINGIFY2(x) #x\n" + "#define NVTE_STRINGIFY(x) NVTE_STRINGIFY2(x)\n" + "static_assert(false, \"NVTE_CUDA_VERSION=\" NVTE_STRINGIFY(CUDA_VERSION));\n"; + + nvrtcProgram probe = nullptr; + if (nvrtcCreateProgram(&probe, probe_source, "nvte_version_probe.cu", 0, nullptr, nullptr) != + NVRTC_SUCCESS) { + return -1; + } + + const std::string include_opt = concat_strings("-I", include_dir); + const char *opts[] = {include_opt.c_str()}; + nvrtcCompileProgram(probe, 1, opts); // expected to fail + + std::string log; + size_t log_size = 0; + if (nvrtcGetProgramLogSize(probe, &log_size) == NVRTC_SUCCESS && log_size > 0) { + log.resize(log_size); + if (nvrtcGetProgramLog(probe, log.data()) != NVRTC_SUCCESS) { + log.clear(); + } + } + nvrtcDestroyProgram(&probe); + + // Parse the integer that follows our embedded marker + const std::string marker = "NVTE_CUDA_VERSION="; + const auto marker_pos = log.find(marker); + if (marker_pos == std::string::npos) { + return -1; + } + try { + return std::stoi(log.substr(marker_pos + marker.size())); + } catch (...) { + return -1; + } +} + /*! \brief Latest compute capability that NVRTC supports * * \return Compute capability as int. Last digit is minor revision, @@ -184,6 +239,24 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& NVTE_CHECK_NVRTC(nvrtcGetProgramLog(program, &log[log_offset])); log.back() = '\n'; std::cerr << log; + // Warn if a NVRTC/header version mismatch may explain the failure + int nvrtc_major = 0, nvrtc_minor = 0; + const int header_ver = cuda_header_version(); + if (nvrtcGetVersion(&nvrtc_major, &nvrtc_minor) == NVRTC_SUCCESS && header_ver >= 0) { + const int header_major = header_ver / 1000; + const int header_minor = (header_ver % 1000) / 10; + if (nvrtc_major != header_major || nvrtc_minor != header_minor) { + std::cerr << concat_strings( + "Note: NVRTC library version (", nvrtc_major, ".", nvrtc_minor, + ") does not match CUDA headers version (", header_major, ".", header_minor, + ") found in \"", cuda::include_directory(false), "\". " + "This version mismatch may have caused the above compilation failure. " + "Consider setting NVTE_CUDA_INCLUDE_DIR to a path with CUDA ", nvrtc_major, ".", + nvrtc_minor, + " headers, " + "or adjusting CUDA_HOME or LD_LIBRARY_PATH to link the correct NVRTC library.\n"); + } + } NVTE_CHECK_NVRTC(compile_result); } From 19846692113e445a2f3d554532d672288cd45274 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Mon, 6 Apr 2026 23:24:47 +0000 Subject: [PATCH 2/6] Move CUDA header version check to CUDA runtime utils Still buggy, include_directory_version returns CUDA runtime version instead of header version. Signed-off-by: Tim Moon --- .../common/util/cuda_runtime.cpp | 68 ++++++++++++ transformer_engine/common/util/cuda_runtime.h | 15 +++ transformer_engine/common/util/rtc.cpp | 101 +++++------------- 3 files changed, 109 insertions(+), 75 deletions(-) diff --git a/transformer_engine/common/util/cuda_runtime.cpp b/transformer_engine/common/util/cuda_runtime.cpp index 4b43940a51..ce4c0c5b56 100644 --- a/transformer_engine/common/util/cuda_runtime.cpp +++ b/transformer_engine/common/util/cuda_runtime.cpp @@ -7,6 +7,7 @@ #include "../util/cuda_runtime.h" #include +#include #include #include @@ -202,6 +203,73 @@ const std::string &include_directory(bool required) { return path; } +int include_directory_version(bool required) { + // Header path + const auto &include_dir = cuda::include_directory(false); + if (include_dir.empty()) { + if (required) { + NVTE_ERROR("Could not detect version of CUDA Toolkit headers " + "(CUDA Toolkit headers not found)."); + } + return -1; + } + + // Program to probe CUDA version + static const char probe_source[] = R"!( +#include +#define STRINGIFY(s) #s +#define XSTRINGIFY(s) STRINGIFY(s) +static_assert(false, + " transformer_engine::cuda::include_directory_version " + "CUDART_VERSION=" XSTRINGIFY(CUDART_VERSION) " "); +)!"; + nvrtcProgram probe = nullptr; + const auto create_program_status = nvrtcCreateProgram(&probe, probe_source, "version_probe.cu", 0, nullptr, nullptr); + if (create_program_status != NVRTC_SUCCESS) { + if (required) { + NVTE_CHECK_NVRTC(create_program_status); + } + return -1; + } + + // Compile program and extract logs + const std::string include_opt = concat_strings("-I", include_dir); + const char *opts[] = {include_opt.c_str()}; + nvrtcCompileProgram(probe, 1, opts); // Expected to fail + std::string log; + size_t log_size = 0; + if (nvrtcGetProgramLogSize(probe, &log_size) == NVRTC_SUCCESS && log_size > 0) { + log.resize(log_size); + if (nvrtcGetProgramLog(probe, log.data()) != NVRTC_SUCCESS) { + log.clear(); + } + } + nvrtcDestroyProgram(&probe); + + // Determine version by parsing build logs + int version = -1; + do { + const std::string marker = " transformer_engine::cuda::include_directory_version CUDART_VERSION="; + const auto marker_pos = log.find(marker); + if (marker_pos == std::string::npos) { + break; + } + try { + version = std::stoi(log.substr(marker_pos + marker.size())); + } catch (const std::invalid_argument &) { + break; + } catch (const std::out_of_range &) { + break; + } + } while (false); + if (version < 0 && required) { + NVTE_ERROR("Could not detect version of CUDA Toolkit headers " + "(Error parsing build logs of probe program)."); + } + + return version; +} + int cudart_version() { auto get_version = []() -> int { int version; diff --git a/transformer_engine/common/util/cuda_runtime.h b/transformer_engine/common/util/cuda_runtime.h index f0aa239622..0f35594001 100644 --- a/transformer_engine/common/util/cuda_runtime.h +++ b/transformer_engine/common/util/cuda_runtime.h @@ -67,6 +67,21 @@ bool supports_multicast(int device_id = -1); */ const std::string &include_directory(bool required = false); +/* \brief Version number of CUDA Toolkit headers + * + * The headers are accessed at run-time and its CUDA version may + * differ from compile-time and from the CUDA Runtime. The header path + * can be configured by setting NVTE_CUDA_INCLUDE_DIR in the + * environment (default is to search in common install paths). + * + * \param[in] required Whether to throw exception if headers are not + * found or if version cannot be determined. + * + * \return CUDA version encoded as major * 1000 + minor * 10, or -1 if + * it could not be determined. + */ +int include_directory_version(bool required = false); + /* \brief CUDA Runtime version number at run-time * * Versions may differ between compile-time and run-time. diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index ab92de6913..8d45007418 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -12,6 +12,7 @@ #include "../common.h" #include "../util/cuda_driver.h" +#include "../util/cuda_runtime.h" #include "../util/string.h" #include "../util/system.h" @@ -25,61 +26,6 @@ namespace { #include "string_code_util_math_h.h" #include "string_code_utils_cuh.h" -/*! \brief CUDA version reported by the headers at the configured include path - * - * Compiles a tiny probe through NVRTC using the same include path as real - * RTC kernels. Including cuda.h ensures CUDA_VERSION is defined, and - * embedding it in a static_assert failure message lets us extract the - * resolved value from the diagnostic log without parsing header text. - * - * \return CUDA version encoded as major * 1000 + minor * 10 - * (e.g. 12040 for CUDA 12.4), or -1 if it could not be determined. - */ -int cuda_header_version() { - const std::string &include_dir = cuda::include_directory(false); - if (include_dir.empty()) { - return -1; - } - - static const char probe_source[] = - "#include \n" - "#define NVTE_STRINGIFY2(x) #x\n" - "#define NVTE_STRINGIFY(x) NVTE_STRINGIFY2(x)\n" - "static_assert(false, \"NVTE_CUDA_VERSION=\" NVTE_STRINGIFY(CUDA_VERSION));\n"; - - nvrtcProgram probe = nullptr; - if (nvrtcCreateProgram(&probe, probe_source, "nvte_version_probe.cu", 0, nullptr, nullptr) != - NVRTC_SUCCESS) { - return -1; - } - - const std::string include_opt = concat_strings("-I", include_dir); - const char *opts[] = {include_opt.c_str()}; - nvrtcCompileProgram(probe, 1, opts); // expected to fail - - std::string log; - size_t log_size = 0; - if (nvrtcGetProgramLogSize(probe, &log_size) == NVRTC_SUCCESS && log_size > 0) { - log.resize(log_size); - if (nvrtcGetProgramLog(probe, log.data()) != NVRTC_SUCCESS) { - log.clear(); - } - } - nvrtcDestroyProgram(&probe); - - // Parse the integer that follows our embedded marker - const std::string marker = "NVTE_CUDA_VERSION="; - const auto marker_pos = log.find(marker); - if (marker_pos == std::string::npos) { - return -1; - } - try { - return std::stoi(log.substr(marker_pos + marker.size())); - } catch (...) { - return -1; - } -} - /*! \brief Latest compute capability that NVRTC supports * * \return Compute capability as int. Last digit is minor revision, @@ -230,33 +176,38 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& const nvrtcResult compile_result = nvrtcCompileProgram(program, opts_ptrs.size(), opts_ptrs.data()); if (compile_result != NVRTC_SUCCESS) { - // Display log if compilation failed - std::string log = concat_strings("NVRTC compilation log for ", filename, ":\n"); + std::string log; + + // Check CUDA versions + const int build_version = CUDA_VERSION; + int nvrtc_version = -1; + int nvrtc_version_major = 0, nvrtc_version_minor = 0; + if (nvrtcVersion(&nvrtc_version_major, &nvrtc_version_minor) == NVRTC_SUCCESS) { + nvrtc_version = nvrtc_version_major * 1000 + nvrtc_version_minor * 10; + } + const int header_version = cuda::include_directory_version(); + log += concat_strings("Compile-time CUDA version: ", build_version, "\n", + "Run-time NVRTC version: ", nvrtc_version, "\n", + "Run-time CUDA headers version: ", header_version, "\n"); + if (nvrtc_version != header_version) { + log += concat_strings( + "\nWarning: CUDA versions do not match between NVRTC and CUDA headers (", + cuda::include_directory(), "). " + "Consider changing the CUDA header search path (by setting NVTE_CUDA_INCLUDE_DIR) " + "or the linked CUDA Runtime (by setting CUDA_HOME or LD_LIBRARY_PATH).\n\n"); + } + + // Get build log + log += concat_strings("NVRTC compilation log for ", filename, ":\n"); const size_t log_offset = log.size(); size_t log_size; NVTE_CHECK_NVRTC(nvrtcGetProgramLogSize(program, &log_size)); log.resize(log_offset + log_size); NVTE_CHECK_NVRTC(nvrtcGetProgramLog(program, &log[log_offset])); log.back() = '\n'; + + // Display log and throw error std::cerr << log; - // Warn if a NVRTC/header version mismatch may explain the failure - int nvrtc_major = 0, nvrtc_minor = 0; - const int header_ver = cuda_header_version(); - if (nvrtcGetVersion(&nvrtc_major, &nvrtc_minor) == NVRTC_SUCCESS && header_ver >= 0) { - const int header_major = header_ver / 1000; - const int header_minor = (header_ver % 1000) / 10; - if (nvrtc_major != header_major || nvrtc_minor != header_minor) { - std::cerr << concat_strings( - "Note: NVRTC library version (", nvrtc_major, ".", nvrtc_minor, - ") does not match CUDA headers version (", header_major, ".", header_minor, - ") found in \"", cuda::include_directory(false), "\". " - "This version mismatch may have caused the above compilation failure. " - "Consider setting NVTE_CUDA_INCLUDE_DIR to a path with CUDA ", nvrtc_major, ".", - nvrtc_minor, - " headers, " - "or adjusting CUDA_HOME or LD_LIBRARY_PATH to link the correct NVRTC library.\n"); - } - } NVTE_CHECK_NVRTC(compile_result); } From 9e73d48770b1a689f0db0515fdd2116b76d10aa1 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Tue, 7 Apr 2026 00:19:27 +0000 Subject: [PATCH 3/6] [NVRTC] Fix CUDA header version detection The NVRTC probe approach was broken: NVRTC pre-defines CUDART_VERSION to its own version before processing any includes, so the probe always returned the NVRTC version regardless of the headers on the include path. Fix by reading cuda_runtime_api.h as text and parsing the "#define CUDART_VERSION " line directly. This is immune to NVRTC's internal macro management, and the format has been stable across all CUDA versions. Also decode raw CUDA version integers to "major.minor" strings in the error message for readability. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Tim Moon --- .../common/util/cuda_runtime.cpp | 73 ++++++------------- transformer_engine/common/util/rtc.cpp | 14 +++- 2 files changed, 34 insertions(+), 53 deletions(-) diff --git a/transformer_engine/common/util/cuda_runtime.cpp b/transformer_engine/common/util/cuda_runtime.cpp index ce4c0c5b56..8c751408c1 100644 --- a/transformer_engine/common/util/cuda_runtime.cpp +++ b/transformer_engine/common/util/cuda_runtime.cpp @@ -7,9 +7,9 @@ #include "../util/cuda_runtime.h" #include -#include #include +#include #include #include "../common.h" @@ -214,60 +214,33 @@ int include_directory_version(bool required) { return -1; } - // Program to probe CUDA version - static const char probe_source[] = R"!( -#include -#define STRINGIFY(s) #s -#define XSTRINGIFY(s) STRINGIFY(s) -static_assert(false, - " transformer_engine::cuda::include_directory_version " - "CUDART_VERSION=" XSTRINGIFY(CUDART_VERSION) " "); -)!"; - nvrtcProgram probe = nullptr; - const auto create_program_status = nvrtcCreateProgram(&probe, probe_source, "version_probe.cu", 0, nullptr, nullptr); - if (create_program_status != NVRTC_SUCCESS) { - if (required) { - NVTE_CHECK_NVRTC(create_program_status); - } - return -1; - } - - // Compile program and extract logs - const std::string include_opt = concat_strings("-I", include_dir); - const char *opts[] = {include_opt.c_str()}; - nvrtcCompileProgram(probe, 1, opts); // Expected to fail - std::string log; - size_t log_size = 0; - if (nvrtcGetProgramLogSize(probe, &log_size) == NVRTC_SUCCESS && log_size > 0) { - log.resize(log_size); - if (nvrtcGetProgramLog(probe, log.data()) != NVRTC_SUCCESS) { - log.clear(); + // Parse CUDART_VERSION from cuda_runtime_api.h. + const auto header_path = std::filesystem::path(include_dir) / "cuda_runtime_api.h"; + std::ifstream header_file(header_path); + if (header_file.is_open()) { + const std::string define_prefix = "#define CUDART_VERSION "; + std::string line; + while (std::getline(header_file, line)) { + const auto pos = line.find(define_prefix); + if (pos == std::string::npos) { + continue; + } + try { + const int version = std::stoi(line.substr(pos + define_prefix.size())); + if (version > 0) { + return version; + } + } catch (...) { + continue; + } } } - nvrtcDestroyProgram(&probe); - // Determine version by parsing build logs - int version = -1; - do { - const std::string marker = " transformer_engine::cuda::include_directory_version CUDART_VERSION="; - const auto marker_pos = log.find(marker); - if (marker_pos == std::string::npos) { - break; - } - try { - version = std::stoi(log.substr(marker_pos + marker.size())); - } catch (const std::invalid_argument &) { - break; - } catch (const std::out_of_range &) { - break; - } - } while (false); - if (version < 0 && required) { + if (required) { NVTE_ERROR("Could not detect version of CUDA Toolkit headers " - "(Error parsing build logs of probe program)."); + "(Could not parse CUDART_VERSION from ", header_path.string(), ")."); } - - return version; + return -1; } int cudart_version() { diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index 8d45007418..31a8cd99ce 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -178,6 +178,14 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& if (compile_result != NVRTC_SUCCESS) { std::string log; + // Decode CUDA version number to "major.minor" string + auto version_string = [](int v) -> std::string { + if (v < 0) { + return ""; + } + return concat_strings(v / 1000, ".", (v % 1000) / 10); + }; + // Check CUDA versions const int build_version = CUDA_VERSION; int nvrtc_version = -1; @@ -186,9 +194,9 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& nvrtc_version = nvrtc_version_major * 1000 + nvrtc_version_minor * 10; } const int header_version = cuda::include_directory_version(); - log += concat_strings("Compile-time CUDA version: ", build_version, "\n", - "Run-time NVRTC version: ", nvrtc_version, "\n", - "Run-time CUDA headers version: ", header_version, "\n"); + log += concat_strings("Compile-time CUDA version: ", version_string(build_version), "\n", + "Run-time NVRTC version: ", version_string(nvrtc_version), "\n", + "Run-time CUDA headers version: ", version_string(header_version), "\n"); if (nvrtc_version != header_version) { log += concat_strings( "\nWarning: CUDA versions do not match between NVRTC and CUDA headers (", From 4af8ac5860270d77d5e9853e691637c631b1a901 Mon Sep 17 00:00:00 2001 From: Tim Moon Date: Tue, 7 Apr 2026 00:35:45 +0000 Subject: [PATCH 4/6] [NVRTC] Add unit tests for CUDA header detection Test that the CUDA include directory is found and that its version matches the compile-time CUDART_VERSION. Also export transformer_engine::cuda::* symbols and tighten the rtc export pattern in the version script. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Tim Moon --- tests/cpp/util/test_nvrtc.cpp | 9 +++++++++ transformer_engine/common/libtransformer_engine.version | 8 ++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/cpp/util/test_nvrtc.cpp b/tests/cpp/util/test_nvrtc.cpp index d41084449e..dab945ecf0 100644 --- a/tests/cpp/util/test_nvrtc.cpp +++ b/tests/cpp/util/test_nvrtc.cpp @@ -9,10 +9,19 @@ #include +#include "util/cuda_runtime.h" #include "util/rtc.h" using namespace transformer_engine; +TEST(UtilTest, CUDAHeaders) { + if (!rtc::is_enabled()) { + GTEST_SKIP() << "NVRTC not enabled, skipping tests"; + } + EXPECT_FALSE(cuda::include_directory().empty()); + EXPECT_EQ(cuda::include_directory_version(), CUDART_VERSION); +} + TEST(UtilTest, NVRTC) { if (!rtc::is_enabled()) { GTEST_SKIP() << "NVRTC not enabled, skipping tests"; diff --git a/transformer_engine/common/libtransformer_engine.version b/transformer_engine/common/libtransformer_engine.version index 706c237ccc..4eb24ec62a 100644 --- a/transformer_engine/common/libtransformer_engine.version +++ b/transformer_engine/common/libtransformer_engine.version @@ -2,15 +2,11 @@ global: extern "C++" { nvte_*; - transformer_engine::cuda::sm_count*; - transformer_engine::cuda::sm_arch*; - transformer_engine::cuda::supports_multicast*; - transformer_engine::cuda::stream_priority_range*; - transformer_engine::cuda::current_device*; + transformer_engine::cuda::*; transformer_engine::cuda_driver::get_symbol*; transformer_engine::cuda_driver::ensure_context_exists*; transformer_engine::ubuf_built_with_mpi*; - *transformer_engine::rtc*; + *transformer_engine::rtc::*; transformer_engine::nvte_cudnn_handle_init*; transformer_engine::nvte_cublas_handle_init*; transformer_engine::typeToSize*; From 2f0dae3af72176efe615add72952a27539410f71 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 7 Apr 2026 00:50:03 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- transformer_engine/common/util/cuda_runtime.cpp | 11 +++++++---- transformer_engine/common/util/rtc.cpp | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/transformer_engine/common/util/cuda_runtime.cpp b/transformer_engine/common/util/cuda_runtime.cpp index 8c751408c1..504d761bb1 100644 --- a/transformer_engine/common/util/cuda_runtime.cpp +++ b/transformer_engine/common/util/cuda_runtime.cpp @@ -208,8 +208,9 @@ int include_directory_version(bool required) { const auto &include_dir = cuda::include_directory(false); if (include_dir.empty()) { if (required) { - NVTE_ERROR("Could not detect version of CUDA Toolkit headers " - "(CUDA Toolkit headers not found)."); + NVTE_ERROR( + "Could not detect version of CUDA Toolkit headers " + "(CUDA Toolkit headers not found)."); } return -1; } @@ -237,8 +238,10 @@ int include_directory_version(bool required) { } if (required) { - NVTE_ERROR("Could not detect version of CUDA Toolkit headers " - "(Could not parse CUDART_VERSION from ", header_path.string(), ")."); + NVTE_ERROR( + "Could not detect version of CUDA Toolkit headers " + "(Could not parse CUDART_VERSION from ", + header_path.string(), ")."); } return -1; } diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index 31a8cd99ce..2559af613a 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -200,7 +200,8 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& if (nvrtc_version != header_version) { log += concat_strings( "\nWarning: CUDA versions do not match between NVRTC and CUDA headers (", - cuda::include_directory(), "). " + cuda::include_directory(), + "). " "Consider changing the CUDA header search path (by setting NVTE_CUDA_INCLUDE_DIR) " "or the linked CUDA Runtime (by setting CUDA_HOME or LD_LIBRARY_PATH).\n\n"); } From 2b90938f9ec8bcc04f0b00bdc5f6da11bbcba5a7 Mon Sep 17 00:00:00 2001 From: Tim Moon <4406448+timmoon10@users.noreply.github.com> Date: Tue, 7 Apr 2026 10:51:36 -0700 Subject: [PATCH 6/6] Tweak version message Suggestion from @ptrendx Signed-off-by: Tim Moon <4406448+timmoon10@users.noreply.github.com> --- transformer_engine/common/util/rtc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transformer_engine/common/util/rtc.cpp b/transformer_engine/common/util/rtc.cpp index 2559af613a..70024a202c 100644 --- a/transformer_engine/common/util/rtc.cpp +++ b/transformer_engine/common/util/rtc.cpp @@ -181,7 +181,7 @@ void KernelManager::compile(const std::string& kernel_label, const std::string& // Decode CUDA version number to "major.minor" string auto version_string = [](int v) -> std::string { if (v < 0) { - return ""; + return ""; } return concat_strings(v / 1000, ".", (v % 1000) / 10); };