mirror of
https://github.com/Kitware/CMake.git
synced 2026-06-25 17:28:53 +00:00
CUDA: Add support for CUDA_ARCHITECTURES=native
CUDA 11.6 added the `nvcc -arch=native` flag to automatically compile for the host GPUs' architectures. Add support for specifying this special `native` value in `CMAKE_CUDA_ARCHITECTURES` and `CUDA_ARCHITECTURES`. During the compiler ABI detection step, detect the native architectures so we can pass them explicitly when using Clang or older versions of nvcc. Fixes: #22375
This commit is contained in:
@@ -34,6 +34,11 @@ The ``CUDA_ARCHITECTURES`` may be set to one of the following special values:
|
||||
Compile for all supported major real architectures, and the highest
|
||||
major virtual architecture.
|
||||
|
||||
``native``
|
||||
.. versionadded:: 3.24
|
||||
|
||||
Compile for the architecture(s) of the host's GPU(s).
|
||||
|
||||
Examples
|
||||
^^^^^^^^
|
||||
|
||||
|
||||
7
Help/release/dev/cuda-arch-native.rst
Normal file
7
Help/release/dev/cuda-arch-native.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
cuda-arch-native
|
||||
----------------
|
||||
|
||||
* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable and associated
|
||||
:prop_tgt:`CUDA_ARCHITECTURES` target property now support the
|
||||
special ``native`` value to compile for the architectures(s)
|
||||
of the host's GPU(s).
|
||||
@@ -55,6 +55,7 @@ set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@")
|
||||
set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@")
|
||||
set(CMAKE_CUDA_ARCHITECTURES_NATIVE "@CMAKE_CUDA_ARCHITECTURES_NATIVE@")
|
||||
|
||||
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
|
||||
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# error "A C or C++ compiler has been selected for CUDA"
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "CMakeCompilerABI.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
@@ -13,6 +17,31 @@ int main(int argc, char* argv[])
|
||||
#if defined(ABI_ID)
|
||||
require += info_abi[argc];
|
||||
#endif
|
||||
(void)argv;
|
||||
return require;
|
||||
static_cast<void>(argv);
|
||||
|
||||
int count = 0;
|
||||
if (cudaGetDeviceCount(&count) != cudaSuccess || count == 0) {
|
||||
std::fprintf(stderr, "No CUDA devices found.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int found = 0;
|
||||
const char* sep = "";
|
||||
for (int device = 0; device < count; ++device) {
|
||||
cudaDeviceProp prop;
|
||||
if (cudaGetDeviceProperties(&prop, device) == cudaSuccess) {
|
||||
std::printf("%s%d%d", sep, prop.major, prop.minor);
|
||||
sep = ";";
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
std::fprintf(stderr, "No CUDA architecture detected from any devices.\n");
|
||||
// Convince the compiler that the non-zero return value depends
|
||||
// on the info strings so they are not optimized out.
|
||||
return require ? -1 : 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -249,7 +249,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
|
||||
# Make the all and all-major architecture information available.
|
||||
# Make the all, all-major, and native architecture information available.
|
||||
# FIXME(#23161): Defer architecture detection until compiler testing.
|
||||
include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
|
||||
endif()
|
||||
@@ -291,6 +291,17 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
|
||||
endif()
|
||||
endif()
|
||||
elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
|
||||
# For sufficiently new NVCC we can just use the 'native' value directly.
|
||||
# For VS we don't test since we can't find nvcc this early (see #23161).
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6)
|
||||
string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
|
||||
set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
|
||||
elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
|
||||
else()
|
||||
set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE})
|
||||
endif()
|
||||
elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
|
||||
# Explicit architectures. Test them during detection.
|
||||
set(architectures_explicit TRUE)
|
||||
@@ -636,7 +647,7 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
|
||||
message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
|
||||
endif()
|
||||
endif()
|
||||
elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major)$")
|
||||
elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$")
|
||||
# Sort since order mustn't matter.
|
||||
list(SORT architectures_detected)
|
||||
list(SORT architectures_tested)
|
||||
|
||||
@@ -26,6 +26,14 @@ function(CMAKE_DETERMINE_COMPILER_ABI lang src)
|
||||
if(DEFINED CMAKE_${lang}_VERBOSE_COMPILE_FLAG)
|
||||
set(COMPILE_DEFINITIONS "${CMAKE_${lang}_VERBOSE_COMPILE_FLAG}")
|
||||
endif()
|
||||
if(lang STREQUAL "CUDA")
|
||||
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
|
||||
# We are about to detect the native architectures, so we do
|
||||
# not yet know them. Use all architectures during detection.
|
||||
set(CMAKE_CUDA_ARCHITECTURES "all")
|
||||
endif()
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY "Static")
|
||||
endif()
|
||||
if(NOT "x${CMAKE_${lang}_COMPILER_ID}" STREQUAL "xMSVC")
|
||||
# Avoid adding our own platform standard libraries for compilers
|
||||
# from which we might detect implicit link libraries.
|
||||
|
||||
@@ -495,7 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
|
||||
if(CMAKE_VS_PLATFORM_NAME STREQUAL x64)
|
||||
set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>")
|
||||
endif()
|
||||
if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$")
|
||||
if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$")
|
||||
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
|
||||
string(REGEX MATCH "[0-9]+" arch_name "${arch}")
|
||||
string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};")
|
||||
|
||||
@@ -21,6 +21,31 @@ if(CMAKE_CUDA_ABI_COMPILED)
|
||||
# The compiler worked so skip dedicated test below.
|
||||
set(CMAKE_CUDA_COMPILER_WORKS TRUE)
|
||||
message(STATUS "Check for working CUDA compiler: ${CMAKE_CUDA_COMPILER} - skipped")
|
||||
|
||||
# Run the test binary to detect the native architectures.
|
||||
execute_process(COMMAND "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCompilerABI_CUDA.bin"
|
||||
RESULT_VARIABLE _CUDA_ARCHS_RESULT
|
||||
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
if(_CUDA_ARCHS_RESULT EQUAL 0)
|
||||
set(CMAKE_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
|
||||
list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_NATIVE)
|
||||
else()
|
||||
if(NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
|
||||
set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
|
||||
else()
|
||||
set(_CUDA_ARCHS_STATUS "")
|
||||
endif()
|
||||
string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}")
|
||||
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
|
||||
"Detecting the CUDA native architecture(s) failed with "
|
||||
"the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
|
||||
endif()
|
||||
unset(_CUDA_ARCHS_EXE)
|
||||
unset(_CUDA_ARCHS_RESULT)
|
||||
unset(_CUDA_ARCHS_OUTPUT)
|
||||
endif()
|
||||
|
||||
# This file is used by EnableLanguage in cmGlobalGenerator to
|
||||
|
||||
@@ -44,3 +44,43 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
|
||||
AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
|
||||
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
|
||||
endif()
|
||||
|
||||
# FIXME(#23161): Detect architectures early since we test them during
|
||||
# compiler detection. We already have code to detect them later during
|
||||
# compiler testing, so we should not need to do this here.
|
||||
if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
|
||||
set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin")
|
||||
execute_process(
|
||||
COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu"
|
||||
RESULT_VARIABLE _CUDA_ARCHS_RESULT
|
||||
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
)
|
||||
if(_CUDA_ARCHS_RESULT EQUAL 0)
|
||||
execute_process(
|
||||
COMMAND "${_CUDA_ARCHS_EXE}"
|
||||
RESULT_VARIABLE _CUDA_ARCHS_RESULT
|
||||
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
endif()
|
||||
if(_CUDA_ARCHS_RESULT EQUAL 0)
|
||||
set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
|
||||
list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE)
|
||||
else()
|
||||
if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
|
||||
set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
|
||||
else()
|
||||
set(_CUDA_ARCHS_STATUS "")
|
||||
endif()
|
||||
string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}")
|
||||
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
|
||||
"Detecting the CUDA native architecture(s) failed with "
|
||||
"the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
|
||||
set(_CUDA_ARCHS_OUTPUT "")
|
||||
endif()
|
||||
unset(_CUDA_ARCHS_EXE)
|
||||
unset(_CUDA_ARCHS_RESULT)
|
||||
unset(_CUDA_ARCHS_OUTPUT)
|
||||
endif()
|
||||
|
||||
@@ -3467,6 +3467,23 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
property =
|
||||
*this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR");
|
||||
}
|
||||
} else if (property == "native") {
|
||||
cmValue native =
|
||||
this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_NATIVE");
|
||||
if (native.IsEmpty()) {
|
||||
this->Makefile->IssueMessage(
|
||||
MessageType::FATAL_ERROR,
|
||||
"CUDA_ARCHITECTURES is set to \"native\", but no GPU was detected.");
|
||||
}
|
||||
if (compiler == "NVIDIA" &&
|
||||
cmSystemTools::VersionCompare(
|
||||
cmSystemTools::OP_GREATER_EQUAL,
|
||||
this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"),
|
||||
"11.6")) {
|
||||
flags = cmStrCat(flags, " -arch=", property);
|
||||
return;
|
||||
}
|
||||
property = *native;
|
||||
}
|
||||
|
||||
struct CudaArchitecture
|
||||
|
||||
@@ -182,7 +182,8 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
|
||||
// First entries for the -arch=<arch> [-code=<code>,...] pair.
|
||||
if (!arch.empty()) {
|
||||
std::string arch_name = arch[0];
|
||||
if (arch_name == "all" || arch_name == "all-major") {
|
||||
if (arch_name == "all" || arch_name == "all-major" ||
|
||||
arch_name == "native") {
|
||||
AppendFlagString("AdditionalOptions", "-arch=" + arch_name);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ function(verify_output flag)
|
||||
endforeach()
|
||||
|
||||
list(SORT command_archs)
|
||||
list(REMOVE_DUPLICATES command_archs)
|
||||
if(NOT "${command_archs}" STREQUAL "${architectures}")
|
||||
message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").")
|
||||
endif()
|
||||
@@ -50,7 +51,17 @@ try_compile(all_major_archs_compiles
|
||||
)
|
||||
verify_output(all-major)
|
||||
|
||||
if(all_archs_compiles AND all_major_archs_compiles)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native)
|
||||
try_compile(native_archs_compiles
|
||||
${CMAKE_CURRENT_BINARY_DIR}/try_compile/native_archs_compiles
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cu
|
||||
COMPILE_DEFINITIONS ${try_compile_flags}
|
||||
OUTPUT_VARIABLE output
|
||||
)
|
||||
verify_output(native)
|
||||
|
||||
if(all_archs_compiles AND all_major_archs_compiles AND native_archs_compiles)
|
||||
set(CMAKE_CUDA_ARCHITECTURES all)
|
||||
add_executable(CudaOnlyArchSpecial main.cu)
|
||||
target_compile_options(CudaOnlyArchSpecial PRIVATE ${compile_options})
|
||||
endif()
|
||||
|
||||
@@ -2,6 +2,7 @@ include(RunCMake)
|
||||
|
||||
run_cmake(architectures-all)
|
||||
run_cmake(architectures-all-major)
|
||||
run_cmake(architectures-native)
|
||||
run_cmake(architectures-empty)
|
||||
run_cmake(architectures-invalid)
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
-- CMAKE_CUDA_ARCHITECTURES='all-major'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
|
||||
|
||||
@@ -3,3 +3,4 @@ enable_language(CUDA)
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
-- CMAKE_CUDA_ARCHITECTURES='all'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
|
||||
|
||||
@@ -3,3 +3,4 @@ enable_language(CUDA)
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
-- CMAKE_CUDA_ARCHITECTURES='native'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
|
||||
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'
|
||||
@@ -0,0 +1,6 @@
|
||||
set(CMAKE_CUDA_ARCHITECTURES "native")
|
||||
enable_language(CUDA)
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
|
||||
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")
|
||||
Reference in New Issue
Block a user