CUDA: Add support for CUDA_ARCHITECTURES=native

CUDA 11.6 added the `nvcc -arch=native` flag to automatically compile
for the host GPUs' architectures.  Add support for specifying this
special `native` value in `CMAKE_CUDA_ARCHITECTURES` and
`CUDA_ARCHITECTURES`.  During the compiler ABI detection step,
detect the native architectures so we can pass them explicitly
when using Clang or older versions of nvcc.

Fixes: #22375
This commit is contained in:
Brad King
2022-03-04 13:51:49 -05:00
parent 632752d62e
commit d1b48bfabd
19 changed files with 177 additions and 7 deletions

View File

@@ -34,6 +34,11 @@ The ``CUDA_ARCHITECTURES`` may be set to one of the following special values:
Compile for all supported major real architectures, and the highest
major virtual architecture.
``native``
.. versionadded:: 3.24
Compile for the architecture(s) of the host's GPU(s).
Examples
^^^^^^^^

View File

@@ -0,0 +1,7 @@
cuda-arch-native
----------------
* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable and associated
:prop_tgt:`CUDA_ARCHITECTURES` target property now support the
special ``native`` value to compile for the architectures(s)
of the host's GPU(s).

View File

@@ -55,6 +55,7 @@ set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@")
set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@")
set(CMAKE_CUDA_ARCHITECTURES_NATIVE "@CMAKE_CUDA_ARCHITECTURES_NATIVE@")
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")

View File

@@ -2,6 +2,10 @@
# error "A C or C++ compiler has been selected for CUDA"
#endif
#include <cstdio>
#include <cuda_runtime.h>
#include "CMakeCompilerABI.h"
int main(int argc, char* argv[])
@@ -13,6 +17,31 @@ int main(int argc, char* argv[])
#if defined(ABI_ID)
require += info_abi[argc];
#endif
(void)argv;
return require;
static_cast<void>(argv);
int count = 0;
if (cudaGetDeviceCount(&count) != cudaSuccess || count == 0) {
std::fprintf(stderr, "No CUDA devices found.\n");
return -1;
}
int found = 0;
const char* sep = "";
for (int device = 0; device < count; ++device) {
cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, device) == cudaSuccess) {
std::printf("%s%d%d", sep, prop.major, prop.minor);
sep = ";";
found = 1;
}
}
if (!found) {
std::fprintf(stderr, "No CUDA architecture detected from any devices.\n");
// Convince the compiler that the non-zero return value depends
// on the info strings so they are not optimized out.
return require ? -1 : 1;
}
return 0;
}

View File

@@ -249,7 +249,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
endif()
# Make the all and all-major architecture information available.
# Make the all, all-major, and native architecture information available.
# FIXME(#23161): Defer architecture detection until compiler testing.
include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
endif()
@@ -291,6 +291,17 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
endif()
endif()
elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
# For sufficiently new NVCC we can just use the 'native' value directly.
# For VS we don't test since we can't find nvcc this early (see #23161).
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6)
string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
else()
set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE})
endif()
elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
# Explicit architectures. Test them during detection.
set(architectures_explicit TRUE)
@@ -636,7 +647,7 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
endif()
endif()
elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major)$")
elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$")
# Sort since order mustn't matter.
list(SORT architectures_detected)
list(SORT architectures_tested)

View File

@@ -26,6 +26,14 @@ function(CMAKE_DETERMINE_COMPILER_ABI lang src)
if(DEFINED CMAKE_${lang}_VERBOSE_COMPILE_FLAG)
set(COMPILE_DEFINITIONS "${CMAKE_${lang}_VERBOSE_COMPILE_FLAG}")
endif()
if(lang STREQUAL "CUDA")
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
# We are about to detect the native architectures, so we do
# not yet know them. Use all architectures during detection.
set(CMAKE_CUDA_ARCHITECTURES "all")
endif()
set(CMAKE_CUDA_RUNTIME_LIBRARY "Static")
endif()
if(NOT "x${CMAKE_${lang}_COMPILER_ID}" STREQUAL "xMSVC")
# Avoid adding our own platform standard libraries for compilers
# from which we might detect implicit link libraries.

View File

@@ -495,7 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
if(CMAKE_VS_PLATFORM_NAME STREQUAL x64)
set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>")
endif()
if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$")
if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$")
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
string(REGEX MATCH "[0-9]+" arch_name "${arch}")
string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};")

View File

@@ -21,6 +21,31 @@ if(CMAKE_CUDA_ABI_COMPILED)
# The compiler worked so skip dedicated test below.
set(CMAKE_CUDA_COMPILER_WORKS TRUE)
message(STATUS "Check for working CUDA compiler: ${CMAKE_CUDA_COMPILER} - skipped")
# Run the test binary to detect the native architectures.
execute_process(COMMAND "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCompilerABI_CUDA.bin"
RESULT_VARIABLE _CUDA_ARCHS_RESULT
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(_CUDA_ARCHS_RESULT EQUAL 0)
set(CMAKE_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_NATIVE)
else()
if(NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
else()
set(_CUDA_ARCHS_STATUS "")
endif()
string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}")
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
"Detecting the CUDA native architecture(s) failed with "
"the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
endif()
unset(_CUDA_ARCHS_EXE)
unset(_CUDA_ARCHS_RESULT)
unset(_CUDA_ARCHS_OUTPUT)
endif()
# This file is used by EnableLanguage in cmGlobalGenerator to

View File

@@ -44,3 +44,43 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
endif()
# FIXME(#23161): Detect architectures early since we test them during
# compiler detection. We already have code to detect them later during
# compiler testing, so we should not need to do this here.
if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin")
execute_process(
COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu"
RESULT_VARIABLE _CUDA_ARCHS_RESULT
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
)
if(_CUDA_ARCHS_RESULT EQUAL 0)
execute_process(
COMMAND "${_CUDA_ARCHS_EXE}"
RESULT_VARIABLE _CUDA_ARCHS_RESULT
OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
ERROR_VARIABLE _CUDA_ARCHS_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
if(_CUDA_ARCHS_RESULT EQUAL 0)
set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE)
else()
if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
else()
set(_CUDA_ARCHS_STATUS "")
endif()
string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}")
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
"Detecting the CUDA native architecture(s) failed with "
"the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
set(_CUDA_ARCHS_OUTPUT "")
endif()
unset(_CUDA_ARCHS_EXE)
unset(_CUDA_ARCHS_RESULT)
unset(_CUDA_ARCHS_OUTPUT)
endif()

View File

@@ -3467,6 +3467,23 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
property =
*this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR");
}
} else if (property == "native") {
cmValue native =
this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_NATIVE");
if (native.IsEmpty()) {
this->Makefile->IssueMessage(
MessageType::FATAL_ERROR,
"CUDA_ARCHITECTURES is set to \"native\", but no GPU was detected.");
}
if (compiler == "NVIDIA" &&
cmSystemTools::VersionCompare(
cmSystemTools::OP_GREATER_EQUAL,
this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"),
"11.6")) {
flags = cmStrCat(flags, " -arch=", property);
return;
}
property = *native;
}
struct CudaArchitecture

View File

@@ -182,7 +182,8 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
// First entries for the -arch=<arch> [-code=<code>,...] pair.
if (!arch.empty()) {
std::string arch_name = arch[0];
if (arch_name == "all" || arch_name == "all-major") {
if (arch_name == "all" || arch_name == "all-major" ||
arch_name == "native") {
AppendFlagString("AdditionalOptions", "-arch=" + arch_name);
return;
}

View File

@@ -25,6 +25,7 @@ function(verify_output flag)
endforeach()
list(SORT command_archs)
list(REMOVE_DUPLICATES command_archs)
if(NOT "${command_archs}" STREQUAL "${architectures}")
message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").")
endif()
@@ -50,7 +51,17 @@ try_compile(all_major_archs_compiles
)
verify_output(all-major)
if(all_archs_compiles AND all_major_archs_compiles)
set(CMAKE_CUDA_ARCHITECTURES native)
try_compile(native_archs_compiles
${CMAKE_CURRENT_BINARY_DIR}/try_compile/native_archs_compiles
${CMAKE_CURRENT_SOURCE_DIR}/main.cu
COMPILE_DEFINITIONS ${try_compile_flags}
OUTPUT_VARIABLE output
)
verify_output(native)
if(all_archs_compiles AND all_major_archs_compiles AND native_archs_compiles)
set(CMAKE_CUDA_ARCHITECTURES all)
add_executable(CudaOnlyArchSpecial main.cu)
target_compile_options(CudaOnlyArchSpecial PRIVATE ${compile_options})
endif()

View File

@@ -2,6 +2,7 @@ include(RunCMake)
run_cmake(architectures-all)
run_cmake(architectures-all-major)
run_cmake(architectures-native)
run_cmake(architectures-empty)
run_cmake(architectures-invalid)

View File

@@ -1,3 +1,4 @@
-- CMAKE_CUDA_ARCHITECTURES='all-major'
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

View File

@@ -3,3 +3,4 @@ enable_language(CUDA)
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")

View File

@@ -1,3 +1,4 @@
-- CMAKE_CUDA_ARCHITECTURES='all'
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

View File

@@ -3,3 +3,4 @@ enable_language(CUDA)
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")

View File

@@ -0,0 +1,4 @@
-- CMAKE_CUDA_ARCHITECTURES='native'
-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+'
-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+'

View File

@@ -0,0 +1,6 @@
set(CMAKE_CUDA_ARCHITECTURES "native")
enable_language(CUDA)
message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'")
message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'")