diff options
Diffstat (limited to 'Modules/FindCUDA.cmake')
-rw-r--r-- | Modules/FindCUDA.cmake | 1156 |
1 files changed, 745 insertions, 411 deletions
diff --git a/Modules/FindCUDA.cmake b/Modules/FindCUDA.cmake index 2705d3271..a4dca5433 100644 --- a/Modules/FindCUDA.cmake +++ b/Modules/FindCUDA.cmake @@ -1,292 +1,351 @@ -# - Tools for building CUDA C files: libraries and build dependencies. -# This script locates the NVIDIA CUDA C tools. It should work on linux, windows, -# and mac and should be reasonably up to date with CUDA C releases. -# -# This script makes use of the standard find_package arguments of <VERSION>, -# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA -# was found. -# -# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the prefix -# cannot be determined by the location of nvcc in the system path and REQUIRED -# is specified to find_package(). To use a different installed version of the -# toolkit set the environment variable CUDA_BIN_PATH before running cmake -# (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default /usr/local/cuda) -# or set CUDA_TOOLKIT_ROOT_DIR after configuring. If you change the value of -# CUDA_TOOLKIT_ROOT_DIR, various components that depend on the path will be -# relocated. +#.rst: +# FindCUDA +# -------- +# +# Tools for building CUDA C files: libraries and build dependencies. +# +# This script locates the NVIDIA CUDA C tools. It should work on linux, +# windows, and mac and should be reasonably up to date with CUDA C +# releases. +# +# This script makes use of the standard find_package arguments of +# <VERSION>, REQUIRED and QUIET. CUDA_FOUND will report if an +# acceptable version of CUDA was found. +# +# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if +# the prefix cannot be determined by the location of nvcc in the system +# path and REQUIRED is specified to find_package(). To use a different +# installed version of the toolkit set the environment variable +# CUDA_BIN_PATH before running cmake (e.g. +# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default +# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring. If +# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that +# depend on the path will be relocated. # # It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain -# platforms, or to use a cuda runtime not installed in the default location. In -# newer versions of the toolkit the cuda library is included with the graphics -# driver- be sure that the driver version matches what is needed by the cuda -# runtime version. -# -# The following variables affect the behavior of the macros in the script (in -# alphebetical order). Note that any of these flags can be changed multiple -# times in the same directory before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX or CUDA_WRAP_SRCS. -# -# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) -# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. -# Note that making this different from the host code when generating object -# or C files from CUDA code just won't work, because size_t gets defined by -# nvcc in the generated source. If you compile to PTX and then load the -# file yourself, you can mix bit sizes between device and host. -# -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) -# -- Set to ON if you want the custom build rule to be attached to the source -# file in Visual Studio. Turn OFF if you add the same cuda file to multiple -# targets. -# -# This allows the user to build the target from the CUDA file; however, bad -# things can happen if the CUDA source file is added to multiple targets. -# When performing parallel builds it is possible for the custom build -# command to be run more than once and in parallel causing cryptic build -# errors. VS runs the rules for every source file in the target, and a -# source can have only one rule no matter how many projects it is added to. -# When the rule is run from multiple targets race conditions can occur on -# the generated file. Eventually everything will get built, but if the user -# is unaware of this behavior, there may be confusion. It would be nice if -# this script could detect the reuse of source files across multiple targets -# and turn the option off for the user, but no good solution could be found. -# -# CUDA_BUILD_CUBIN (Default OFF) -# -- Set to ON to enable and extra compilation pass with the -cubin option in -# Device mode. The output is parsed and register, shared memory usage is -# printed during build. -# -# CUDA_BUILD_EMULATION (Default OFF for device mode) -# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files -# when CUDA_BUILD_EMULATION is TRUE. -# -# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) -# -- Set to the path you wish to have the generated files placed. If it is -# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. -# Intermediate files will always be placed in -# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. -# -# CUDA_HOST_COMPILATION_CPP (Default ON) -# -- Set to OFF for C compilation of host code. -# -# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) -# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or -# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or -# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets -# $(VCInstallDir)/bin is a special value that expands out to the path when -# the command is run from withing VS. -# -# CUDA_NVCC_FLAGS -# CUDA_NVCC_FLAGS_<CONFIG> -# -- Additional NVCC command line arguments. NOTE: multiple arguments must be -# semi-colon delimited (e.g. --compiler-options;-Wall) -# -# CUDA_PROPAGATE_HOST_FLAGS (Default ON) -# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration -# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the -# host compiler through nvcc's -Xcompiler flag. This helps make the -# generated host code match the rest of the system better. Sometimes -# certain flags give nvcc problems, and this will help you turn the flag -# propagation off. This does not affect the flags supplied directly to nvcc -# via CUDA_NVCC_FLAGS or through the OPTION flags specified through -# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for -# shared library compilation are not affected by this flag. -# -# CUDA_SEPARABLE_COMPILATION (Default OFF) -# -- If set this will enable separable compilation for all CUDA runtime object -# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY -# (e.g. calling CUDA_WRAP_SRCS directly), -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. -# -# CUDA_VERBOSE_BUILD (Default OFF) -# -- Set to ON to see all the commands used when building the CUDA file. When -# using a Makefile generator the value defaults to VERBOSE (run make -# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will -# always print the output. -# -# The script creates the following macros (in alphebetical order): -# -# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) -# -- Adds the cufft library to the target (can be any target). Handles whether -# you are in emulation mode or not. -# -# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) -# -- Adds the cublas library to the target (can be any target). Handles -# whether you are in emulation mode or not. -# -# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... -# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Creates an executable "cuda_target" which is made up of the files -# specified. All of the non CUDA C files are compiled using the standard -# build rules specified by CMAKE and the cuda files are compiled to object -# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is -# added automatically to include_directories(). Some standard CMake target -# calls can be used on the target after calling this macro -# (e.g. set_target_properties and target_link_libraries), but setting -# properties that adjust compilation flags will not affect code compiled by -# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. -# -# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... -# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. -# -# CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files -# generated. You should make clean after running this target to ensure the -# dependency files get regenerated. -# -# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] -# [OPTIONS ...] ) -# -- Returns a list of generated files from the input source files to be used -# with ADD_LIBRARY or ADD_EXECUTABLE. -# -# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of PTX files generated from the input source files. -# -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var -# cuda_target -# object_files ) -# -- Compute the name of the intermediate link file used for separable -# compilation. This file name is typically passed into -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced -# based on cuda_target the list of objects files that need separable -# compilation as specified by object_files. If the object_files list is -# empty, then output_file_var will be empty. This function is called -# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that -# this is a function and not a macro. -# -# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) -# -- Sets the directories that should be passed to nvcc -# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu -# files. -# -# -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target -# nvcc_flags object_files) -# -# -- Generates the link object required by separable compilation from the given -# object files. This is called automatically for CUDA_ADD_EXECUTABLE and -# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS -# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the -# nvcc_flags passed in are the same as the flags passed in via the OPTIONS -# argument. The only nvcc flag added automatically is the bitness flag as -# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function -# instead of a macro. -# -# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... -# [STATIC | SHARED | MODULE] [OPTIONS ...] ) -# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this -# function under the hood. -# -# Given the list of files (file0 file1 ... fileN) this macro generates -# custom commands that generate either PTX or linkable objects (use "PTX" or -# "OBJ" for the format argument to switch). Files that don't end with .cu -# or have the HEADER_FILE_ONLY property are ignored. -# -# The arguments passed in after OPTIONS are extra command line options to -# give to nvcc. You can also specify per configuration options by -# specifying the name of the configuration followed by the options. General -# options must preceed configuration specific options. Not all -# configurations need to be specified, only the ones provided will be used. -# -# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" -# DEBUG -g -# RELEASE --use_fast_math -# RELWITHDEBINFO --use_fast_math;-g -# MINSIZEREL --use_fast_math -# -# For certain configurations (namely VS generating object files with -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will -# be produced for the given cuda file. This is because when you add the -# cuda file to Visual Studio it knows that this file produces an object file -# and will link in the resulting object file automatically. -# -# This script will also generate a separate cmake script that is used at -# build time to invoke nvcc. This is for several reasons. -# -# 1. nvcc can return negative numbers as return values which confuses -# Visual Studio into thinking that the command succeeded. The script now -# checks the error codes and produces errors when there was a problem. -# -# 2. nvcc has been known to not delete incomplete results when it -# encounters problems. This confuses build systems into thinking the -# target was generated when in fact an unusable file exists. The script -# now deletes the output files if there was an error. -# -# 3. By putting all the options that affect the build into a file and then -# make the build rule dependent on the file, the output files will be -# regenerated when the options change. -# -# This script also looks at optional arguments STATIC, SHARED, or MODULE to -# determine when to target the object compilation for a shared library. -# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in -# CUDA_ADD_LIBRARY. On some systems special flags are added for building -# objects intended for shared libraries. A preprocessor macro, -# <target_name>_EXPORTS is defined when a shared library compilation is -# detected. -# -# Flags passed into add_definitions with -D or /D are passed along to nvcc. -# -# The script defines the following variables: -# -# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. -# CUDA_VERSION_MINOR -- The minor version. -# CUDA_VERSION -# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR -# -# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). -# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the -# SDK. This script will not directly support finding -# specific libraries or headers, as that isn't -# supported by NVIDIA. If you want to change -# libraries when the path changes see the -# FindCUDA.cmake script for an example of how to clear -# these variables. There are also examples of how to -# use the CUDA_SDK_ROOT_DIR to locate headers or -# libraries, if you so choose (at your own risk). -# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically -# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. -# CUDA_LIBRARIES -- Cuda RT library. -# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT -# implementation (alternative to: -# CUDA_ADD_CUFFT_TO_TARGET macro) -# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS -# implementation (alterative to: -# CUDA_ADD_CUBLAS_TO_TARGET macro). -# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. -# Only available for CUDA version 4.0+. -# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. -# Only available for CUDA version 3.2+. -# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. -# Only available for CUDA version 3.2+. -# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library. -# Only available for CUDA version 4.0+. -# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives library (core). -# Only available for CUDA version 5.5+. -# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives library (image processing). -# Only available for CUDA version 5.5+. -# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives library (signal processing). -# Only available for CUDA version 5.5+. -# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# -# -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. +# platforms, or to use a cuda runtime not installed in the default +# location. In newer versions of the toolkit the cuda library is +# included with the graphics driver- be sure that the driver version +# matches what is needed by the cuda runtime version. +# +# The following variables affect the behavior of the macros in the +# script (in alphebetical order). Note that any of these flags can be +# changed multiple times in the same directory before calling +# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX, +# CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS:: +# +# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) +# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. +# Note that making this different from the host code when generating object +# or C files from CUDA code just won't work, because size_t gets defined by +# nvcc in the generated source. If you compile to PTX and then load the +# file yourself, you can mix bit sizes between device and host. +# +# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) +# -- Set to ON if you want the custom build rule to be attached to the source +# file in Visual Studio. Turn OFF if you add the same cuda file to multiple +# targets. +# +# This allows the user to build the target from the CUDA file; however, bad +# things can happen if the CUDA source file is added to multiple targets. +# When performing parallel builds it is possible for the custom build +# command to be run more than once and in parallel causing cryptic build +# errors. VS runs the rules for every source file in the target, and a +# source can have only one rule no matter how many projects it is added to. +# When the rule is run from multiple targets race conditions can occur on +# the generated file. Eventually everything will get built, but if the user +# is unaware of this behavior, there may be confusion. It would be nice if +# this script could detect the reuse of source files across multiple targets +# and turn the option off for the user, but no good solution could be found. +# +# CUDA_BUILD_CUBIN (Default OFF) +# -- Set to ON to enable and extra compilation pass with the -cubin option in +# Device mode. The output is parsed and register, shared memory usage is +# printed during build. +# +# CUDA_BUILD_EMULATION (Default OFF for device mode) +# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files +# when CUDA_BUILD_EMULATION is TRUE. +# +# CUDA_LINK_LIBRARIES_KEYWORD (Default "") +# -- The <PRIVATE|PUBLIC|INTERFACE> keyword to use for internal +# target_link_libraries calls. The default is to use no keyword which +# uses the old "plain" form of target_link_libraries. Note that is matters +# because whatever is used inside the FindCUDA module must also be used +# outside - the two forms of target_link_libraries cannot be mixed. +# +# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) +# -- Set to the path you wish to have the generated files placed. If it is +# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. +# Intermediate files will always be placed in +# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. +# +# CUDA_HOST_COMPILATION_CPP (Default ON) +# -- Set to OFF for C compilation of host code. +# +# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) +# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or +# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or +# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets +# $(VCInstallDir)/bin is a special value that expands out to the path when +# the command is run from within VS. +# +# CUDA_NVCC_FLAGS +# CUDA_NVCC_FLAGS_<CONFIG> +# -- Additional NVCC command line arguments. NOTE: multiple arguments must be +# semi-colon delimited (e.g. --compiler-options;-Wall) +# +# CUDA_PROPAGATE_HOST_FLAGS (Default ON) +# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration +# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the +# host compiler through nvcc's -Xcompiler flag. This helps make the +# generated host code match the rest of the system better. Sometimes +# certain flags give nvcc problems, and this will help you turn the flag +# propagation off. This does not affect the flags supplied directly to nvcc +# via CUDA_NVCC_FLAGS or through the OPTION flags specified through +# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for +# shared library compilation are not affected by this flag. +# +# CUDA_SEPARABLE_COMPILATION (Default OFF) +# -- If set this will enable separable compilation for all CUDA runtime object +# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY +# (e.g. calling CUDA_WRAP_SRCS directly), +# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. +# +# CUDA_SOURCE_PROPERTY_FORMAT +# -- If this source file property is set, it can override the format specified +# to CUDA_WRAP_SRCS (OBJ, PTX, CUBIN, or FATBIN). If an input source file +# is not a .cu file, setting this file will cause it to be treated as a .cu +# file. See documentation for set_source_files_properties on how to set +# this property. +# +# CUDA_USE_STATIC_CUDA_RUNTIME (Default ON) +# -- When enabled the static version of the CUDA runtime library will be used +# in CUDA_LIBRARIES. If the version of CUDA configured doesn't support +# this option, then it will be silently disabled. +# +# CUDA_VERBOSE_BUILD (Default OFF) +# -- Set to ON to see all the commands used when building the CUDA file. When +# using a Makefile generator the value defaults to VERBOSE (run make +# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will +# always print the output. +# +# The script creates the following macros (in alphebetical order):: +# +# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) +# -- Adds the cufft library to the target (can be any target). Handles whether +# you are in emulation mode or not. +# +# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) +# -- Adds the cublas library to the target (can be any target). Handles +# whether you are in emulation mode or not. +# +# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... +# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) +# -- Creates an executable "cuda_target" which is made up of the files +# specified. All of the non CUDA C files are compiled using the standard +# build rules specified by CMAKE and the cuda files are compiled to object +# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is +# added automatically to include_directories(). Some standard CMake target +# calls can be used on the target after calling this macro +# (e.g. set_target_properties and target_link_libraries), but setting +# properties that adjust compilation flags will not affect code compiled by +# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, +# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. +# +# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... +# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) +# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. +# +# CUDA_BUILD_CLEAN_TARGET() +# -- Creates a convience target that deletes all the dependency files +# generated. You should make clean after running this target to ensure the +# dependency files get regenerated. +# +# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] +# [OPTIONS ...] ) +# -- Returns a list of generated files from the input source files to be used +# with ADD_LIBRARY or ADD_EXECUTABLE. +# +# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of PTX files generated from the input source files. +# +# CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of FATBIN files generated from the input source files. +# +# CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of CUBIN files generated from the input source files. +# +# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var +# cuda_target +# object_files ) +# -- Compute the name of the intermediate link file used for separable +# compilation. This file name is typically passed into +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced +# based on cuda_target the list of objects files that need separable +# compilation as specified by object_files. If the object_files list is +# empty, then output_file_var will be empty. This function is called +# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that +# this is a function and not a macro. +# +# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) +# -- Sets the directories that should be passed to nvcc +# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu +# files. +# +# +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target +# nvcc_flags object_files) +# -- Generates the link object required by separable compilation from the given +# object files. This is called automatically for CUDA_ADD_EXECUTABLE and +# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS +# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the +# nvcc_flags passed in are the same as the flags passed in via the OPTIONS +# argument. The only nvcc flag added automatically is the bitness flag as +# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function +# instead of a macro. +# +# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures]) +# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures +# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...) +# - "Auto" detects local machine GPU compute arch at runtime. +# - "Common" and "All" cover common and entire subsets of architectures +# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX +# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal +# NUM: Any number. Only those pairs are currently accepted by NVCC though: +# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 +# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} +# Additionally, sets ${out_variable}_readable to the resulting numeric list +# Example: +# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell) +# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) +# +# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA +# Note that this is a function instead of a macro. +# +# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... +# [STATIC | SHARED | MODULE] [OPTIONS ...] ) +# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, +# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this +# function under the hood. +# +# Given the list of files (file0 file1 ... fileN) this macro generates +# custom commands that generate either PTX or linkable objects (use "PTX" or +# "OBJ" for the format argument to switch). Files that don't end with .cu +# or have the HEADER_FILE_ONLY property are ignored. +# +# The arguments passed in after OPTIONS are extra command line options to +# give to nvcc. You can also specify per configuration options by +# specifying the name of the configuration followed by the options. General +# options must precede configuration specific options. Not all +# configurations need to be specified, only the ones provided will be used. +# +# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" +# DEBUG -g +# RELEASE --use_fast_math +# RELWITHDEBINFO --use_fast_math;-g +# MINSIZEREL --use_fast_math +# +# For certain configurations (namely VS generating object files with +# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will +# be produced for the given cuda file. This is because when you add the +# cuda file to Visual Studio it knows that this file produces an object file +# and will link in the resulting object file automatically. +# +# This script will also generate a separate cmake script that is used at +# build time to invoke nvcc. This is for several reasons. +# +# 1. nvcc can return negative numbers as return values which confuses +# Visual Studio into thinking that the command succeeded. The script now +# checks the error codes and produces errors when there was a problem. +# +# 2. nvcc has been known to not delete incomplete results when it +# encounters problems. This confuses build systems into thinking the +# target was generated when in fact an unusable file exists. The script +# now deletes the output files if there was an error. +# +# 3. By putting all the options that affect the build into a file and then +# make the build rule dependent on the file, the output files will be +# regenerated when the options change. +# +# This script also looks at optional arguments STATIC, SHARED, or MODULE to +# determine when to target the object compilation for a shared library. +# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in +# CUDA_ADD_LIBRARY. On some systems special flags are added for building +# objects intended for shared libraries. A preprocessor macro, +# <target_name>_EXPORTS is defined when a shared library compilation is +# detected. +# +# Flags passed into add_definitions with -D or /D are passed along to nvcc. +# +# +# +# The script defines the following variables:: +# +# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. +# CUDA_VERSION_MINOR -- The minor version. +# CUDA_VERSION +# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR +# CUDA_HAS_FP16 -- Whether a short float (float16,fp16) is supported. +# +# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). +# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the +# SDK. This script will not directly support finding +# specific libraries or headers, as that isn't +# supported by NVIDIA. If you want to change +# libraries when the path changes see the +# FindCUDA.cmake script for an example of how to clear +# these variables. There are also examples of how to +# use the CUDA_SDK_ROOT_DIR to locate headers or +# libraries, if you so choose (at your own risk). +# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically +# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. +# CUDA_LIBRARIES -- Cuda RT library. +# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT +# implementation (alternative to: +# CUDA_ADD_CUFFT_TO_TARGET macro) +# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS +# implementation (alternative to: +# CUDA_ADD_CUBLAS_TO_TARGET macro). +# CUDA_cudart_static_LIBRARY -- Statically linkable cuda runtime library. +# Only available for CUDA version 5.5+ +# CUDA_cudadevrt_LIBRARY -- Device runtime library. +# Required for separable compilation. +# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. +# Only available for CUDA version 4.0+. +# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. +# Only available for CUDA version 3.2+. +# CUDA_cusolver_LIBRARY -- CUDA Direct Solver library. +# Only available for CUDA version 7.0+. +# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. +# Only available for CUDA version 3.2+. +# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives lib. +# Only available for CUDA version 4.0+. +# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives lib (core). +# Only available for CUDA version 5.5+. +# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives lib (image processing). +# Only available for CUDA version 5.5+. +# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives lib (signal processing). +# Only available for CUDA version 5.5+. +# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. +# Only available for CUDA version 3.2+. +# Windows only. +# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. +# Only available for CUDA version 3.2+. +# Windows only. +# + +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# Copyright (c) 2007-2009 +# Scientific Computing and Imaging Institute, University of Utah +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. # The MIT License # @@ -313,11 +372,6 @@ # FindCUDA.cmake -# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3) -cmake_policy(PUSH) -cmake_minimum_required(VERSION 2.6.3) -cmake_policy(POP) - # This macro helps us find the location of helper files we will need the full path to macro(CUDA_FIND_HELPER_FILE _name _extension) set(_full_name "${_name}.${_extension}") @@ -440,7 +494,31 @@ set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.") if(CMAKE_GENERATOR MATCHES "Visual Studio") set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC") else() - set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}" CACHE FILEPATH "Host side compiler used by NVCC") + if(APPLE + AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang" + AND "${CMAKE_C_COMPILER}" MATCHES "/cc$") + # Using cc which is symlink to clang may let NVCC think it is GCC and issue + # unhandled -dumpspecs option to clang. Also in case neither + # CMAKE_C_COMPILER is defined (project does not use C language) nor + # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let + # nvcc use its own default C compiler. + # Only care about this on APPLE with clang to avoid + # following symlinks to things like ccache + if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER) + get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH) + # if the real path does not end up being clang then + # go back to using CMAKE_C_COMPILER + if(NOT "${c_compiler_realpath}" MATCHES "/clang$") + set(c_compiler_realpath "${CMAKE_C_COMPILER}") + endif() + else() + set(c_compiler_realpath "") + endif() + set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC") + else() + set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}" + CACHE FILEPATH "Host side compiler used by NVCC") + endif() endif() # Propagate the host flags to the host compiler via -Xcompiler @@ -459,6 +537,10 @@ mark_as_advanced( CUDA_HOST_COMPILATION_CPP CUDA_NVCC_FLAGS CUDA_PROPAGATE_HOST_FLAGS + CUDA_BUILD_CUBIN + CUDA_BUILD_EMULATION + CUDA_VERBOSE_BUILD + CUDA_SEPARABLE_COMPILATION ) # Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we @@ -481,25 +563,25 @@ endforeach() ############################################################################### ############################################################################### -# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, -# if they have then clear the cache variables, so that will be detected again. -if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") - unset(CUDA_NVCC_EXECUTABLE CACHE) +macro(cuda_unset_include_and_libraries) unset(CUDA_TOOLKIT_INCLUDE CACHE) unset(CUDA_CUDART_LIBRARY CACHE) + unset(CUDA_CUDA_LIBRARY CACHE) # Make sure you run this before you unset CUDA_VERSION. if(CUDA_VERSION VERSION_EQUAL "3.0") # This only existed in the 3.0 version of the CUDA toolkit unset(CUDA_CUDARTEMU_LIBRARY CACHE) endif() - unset(CUDA_VERSION CACHE) - unset(CUDA_CUDA_LIBRARY CACHE) - unset(CUDA_cupti_LIBRARY CACHE) + unset(CUDA_cudart_static_LIBRARY CACHE) + unset(CUDA_cudadevrt_LIBRARY CACHE) unset(CUDA_cublas_LIBRARY CACHE) + unset(CUDA_cublas_device_LIBRARY CACHE) unset(CUDA_cublasemu_LIBRARY CACHE) unset(CUDA_cufft_LIBRARY CACHE) unset(CUDA_cufftemu_LIBRARY CACHE) + unset(CUDA_cupti_LIBRARY CACHE) unset(CUDA_curand_LIBRARY CACHE) + unset(CUDA_cusolver_LIBRARY CACHE) unset(CUDA_cusparse_LIBRARY CACHE) unset(CUDA_npp_LIBRARY CACHE) unset(CUDA_nppc_LIBRARY CACHE) @@ -507,33 +589,50 @@ if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") unset(CUDA_npps_LIBRARY CACHE) unset(CUDA_nvcuvenc_LIBRARY CACHE) unset(CUDA_nvcuvid_LIBRARY CACHE) -endif() + unset(CUDA_USE_STATIC_CUDA_RUNTIME CACHE) + unset(CUDA_GPU_DETECT_OUTPUT CACHE) +endmacro() -if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") - # No specific variables to catch. Use this kind of code before calling - # find_package(CUDA) to clean up any variables that may depend on this path. +# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, +# if they have then clear the cache variables, so that will be detected again. +if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") + unset(CUDA_TOOLKIT_TARGET_DIR CACHE) + unset(CUDA_NVCC_EXECUTABLE CACHE) + cuda_unset_include_and_libraries() + unset(CUDA_VERSION CACHE) +endif() - # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE) - # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE) +if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}") + cuda_unset_include_and_libraries() endif() -# Search for the cuda distribution. -if(NOT CUDA_TOOLKIT_ROOT_DIR) +# +# End of unset() +# + +# +# Start looking for things +# +# Search for the cuda distribution. +if(NOT CUDA_TOOLKIT_ROOT_DIR AND NOT CMAKE_CROSSCOMPILING) # Search in the CUDA_BIN_PATH first. find_path(CUDA_TOOLKIT_ROOT_DIR NAMES nvcc nvcc.exe PATHS + ENV CUDA_TOOLKIT_ROOT ENV CUDA_PATH ENV CUDA_BIN_PATH PATH_SUFFIXES bin bin64 DOC "Toolkit location." NO_DEFAULT_PATH ) + # Now search default paths find_path(CUDA_TOOLKIT_ROOT_DIR NAMES nvcc nvcc.exe - PATHS /usr/local/bin + PATHS /opt/cuda/bin + /usr/local/bin /usr/local/cuda/bin DOC "Toolkit location." ) @@ -542,7 +641,9 @@ if(NOT CUDA_TOOLKIT_ROOT_DIR) string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR}) # We need to force this back into the cache. set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE) + set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR}) endif() + if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR}) if(CUDA_FIND_REQUIRED) message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR") @@ -552,8 +653,45 @@ if(NOT CUDA_TOOLKIT_ROOT_DIR) endif () endif () +if(CMAKE_CROSSCOMPILING) + SET (CUDA_TOOLKIT_ROOT $ENV{CUDA_TOOLKIT_ROOT}) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set (CUDA_TOOLKIT_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + # Support for arm cross compilation + set(CUDA_TOOLKIT_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # Support for aarch64 cross compilation + if (ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux-androideabi") + else() + set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux") + endif (ANDROID_ARCH_NAME STREQUAL "arm64") + endif() + + if (EXISTS "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}") + set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}" CACHE PATH "CUDA Toolkit target location.") + SET (CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT}) + mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR) + endif() + + # add known CUDA targetr root path to the set of directories we search for programs, libraries and headers + set( CMAKE_FIND_ROOT_PATH "${CUDA_TOOLKIT_TARGET_DIR};${CMAKE_FIND_ROOT_PATH}") + macro( cuda_find_host_program ) + find_host_program( ${ARGN} ) + endmacro() +else() + # for non-cross-compile, find_host_program == find_program and CUDA_TOOLKIT_TARGET_DIR == CUDA_TOOLKIT_ROOT_DIR + macro( cuda_find_host_program ) + find_program( ${ARGN} ) + endmacro() + SET (CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR}) +endif() + + # CUDA_NVCC_EXECUTABLE -find_program(CUDA_NVCC_EXECUTABLE +cuda_find_host_program(CUDA_NVCC_EXECUTABLE NAMES nvcc PATHS "${CUDA_TOOLKIT_ROOT_DIR}" ENV CUDA_PATH @@ -562,7 +700,7 @@ find_program(CUDA_NVCC_EXECUTABLE NO_DEFAULT_PATH ) # Search default search paths, after we search our own set of paths. -find_program(CUDA_NVCC_EXECUTABLE nvcc) +cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc) mark_as_advanced(CUDA_NVCC_EXECUTABLE) if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) @@ -578,13 +716,14 @@ else() string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") endif() + # Always set this convenience variable set(CUDA_VERSION_STRING "${CUDA_VERSION}") # CUDA_TOOLKIT_INCLUDE find_path(CUDA_TOOLKIT_INCLUDE device_functions.h # Header included in toolkit - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" + PATHS ${CUDA_TOOLKIT_TARGET_DIR} ENV CUDA_PATH ENV CUDA_INC_PATH PATH_SUFFIXES include @@ -594,8 +733,14 @@ find_path(CUDA_TOOLKIT_INCLUDE find_path(CUDA_TOOLKIT_INCLUDE device_functions.h) mark_as_advanced(CUDA_TOOLKIT_INCLUDE) +if (CUDA_VERSION VERSION_GREATER "7.0" OR EXISTS "${CUDA_TOOLKIT_INCLUDE}/cuda_fp16.h") + set(CUDA_HAS_FP16 TRUE) +else() + set(CUDA_HAS_FP16 FALSE) +endif() + # Set the user list of include dir to nothing to initialize it. -set (CUDA_NVCC_INCLUDE_ARGS_USER "") +set (CUDA_NVCC_INCLUDE_DIRS_USER "") set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext ) @@ -608,19 +753,21 @@ macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext ) # (lib/Win32) and the old path (lib). find_library(${_var} NAMES ${_names} - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" + PATHS "${CUDA_TOOLKIT_TARGET_DIR}" ENV CUDA_PATH ENV CUDA_LIB_PATH PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32" DOC ${_doc} NO_DEFAULT_PATH ) - # Search default search paths, after we search our own set of paths. - find_library(${_var} - NAMES ${_names} - PATHS "/usr/lib/nvidia-current" - DOC ${_doc} - ) + if (NOT CMAKE_CROSSCOMPILING) + # Search default search paths, after we search our own set of paths. + find_library(${_var} + NAMES ${_names} + PATHS "/usr/lib/nvidia-current" + DOC ${_doc} + ) + endif() endmacro() macro(cuda_find_library_local_first _var _names _doc) @@ -642,30 +789,89 @@ if(CUDA_VERSION VERSION_EQUAL "3.0") ) endif() +if(NOT CUDA_VERSION VERSION_LESS "5.5") + cuda_find_library_local_first(CUDA_cudart_static_LIBRARY cudart_static "static CUDA runtime library") + mark_as_advanced(CUDA_cudart_static_LIBRARY) +endif() + + +if(CUDA_cudart_static_LIBRARY) + # If static cudart available, use it by default, but provide a user-visible option to disable it. + option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON) + set(CUDA_CUDART_LIBRARY_VAR CUDA_cudart_static_LIBRARY) +else() + # If not available, silently disable the option. + set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "") + set(CUDA_CUDART_LIBRARY_VAR CUDA_CUDART_LIBRARY) +endif() +if(NOT CUDA_VERSION VERSION_LESS "5.0") + cuda_find_library_local_first(CUDA_cudadevrt_LIBRARY cudadevrt "\"cudadevrt\" library") + mark_as_advanced(CUDA_cudadevrt_LIBRARY) +endif() + +if(CUDA_USE_STATIC_CUDA_RUNTIME) + if(UNIX) + # Check for the dependent libraries. Here we look for pthreads. + if (DEFINED CMAKE_THREAD_PREFER_PTHREAD) + set(_cuda_cmake_thread_prefer_pthread ${CMAKE_THREAD_PREFER_PTHREAD}) + endif() + set(CMAKE_THREAD_PREFER_PTHREAD 1) + + # Many of the FindXYZ CMake comes with makes use of try_compile with int main(){return 0;} + # as the source file. Unfortunately this causes a warning with -Wstrict-prototypes and + # -Werror causes the try_compile to fail. We will just temporarily disable other flags + # when doing the find_package command here. + set(_cuda_cmake_c_flags ${CMAKE_C_FLAGS}) + set(CMAKE_C_FLAGS "-fPIC") + find_package(Threads REQUIRED) + set(CMAKE_C_FLAGS ${_cuda_cmake_c_flags}) + + if (DEFINED _cuda_cmake_thread_prefer_pthread) + set(CMAKE_THREAD_PREFER_PTHREAD ${_cuda_cmake_thread_prefer_pthread}) + unset(_cuda_cmake_thread_prefer_pthread) + else() + unset(CMAKE_THREAD_PREFER_PTHREAD) + endif() + + if(NOT APPLE) + #On Linux, you must link against librt when using the static cuda runtime. + find_library(CUDA_rt_LIBRARY rt) + if (NOT CUDA_rt_LIBRARY) + message(WARNING "Expecting to find librt for libcudart_static, but didn't find it.") + endif() + endif() + endif() +endif() + # CUPTI library showed up in cuda toolkit 4.0 if(NOT CUDA_VERSION VERSION_LESS "4.0") cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/") mark_as_advanced(CUDA_cupti_LIBRARY) endif() +# Set the CUDA_LIBRARIES variable. This is the set of stuff to link against if you are +# using the CUDA runtime. For the dynamic version of the runtime, most of the +# dependencies are brough in, but for the static version there are additional libraries +# and linker commands needed. +# Initialize to empty +set(CUDA_LIBRARIES) + # If we are using emulation mode and we found the cudartemu library then use # that one instead of cudart. if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) - set(CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY}) -else() - set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) -endif() -if(APPLE) - # We need to add the path to cudart to the linker using rpath, since the - # library name for the cuda libraries is prepended with @rpath. - if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY) - get_filename_component(_cuda_path_to_cudart "${CUDA_CUDARTEMU_LIBRARY}" PATH) - else() - get_filename_component(_cuda_path_to_cudart "${CUDA_CUDART_LIBRARY}" PATH) + list(APPEND CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY}) +elseif(CUDA_USE_STATIC_CUDA_RUNTIME AND CUDA_cudart_static_LIBRARY) + list(APPEND CUDA_LIBRARIES ${CUDA_cudart_static_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS}) + if (CUDA_rt_LIBRARY) + list(APPEND CUDA_LIBRARIES ${CUDA_rt_LIBRARY}) endif() - if(_cuda_path_to_cudart) - list(APPEND CUDA_LIBRARIES -Wl,-rpath "-Wl,${_cuda_path_to_cudart}") + if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + list(APPEND CUDA_LIBRARIES -Wl,-rpath,/usr/local/cuda/lib) endif() +else() + list(APPEND CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY}) endif() # 1.1 toolkit on linux doesn't appear to have a separate library on @@ -710,6 +916,7 @@ if(NOT CUDA_VERSION VERSION_LESS "3.2") endif() endif() if(CUDA_VERSION VERSION_GREATER "5.0") + find_cuda_helper_libs(cublas_device) # In CUDA 5.5 NPP was splitted onto 3 separate libraries. find_cuda_helper_libs(nppc) find_cuda_helper_libs(nppi) @@ -718,13 +925,17 @@ if(CUDA_VERSION VERSION_GREATER "5.0") elseif(NOT CUDA_VERSION VERSION_LESS "4.0") find_cuda_helper_libs(npp) endif() +if(NOT CUDA_VERSION VERSION_LESS "7.0") + # cusolver showed up in version 7.0 + find_cuda_helper_libs(cusolver) +endif() if (CUDA_BUILD_EMULATION) set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY}) set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY}) else() set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY}) + set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY}) endif() ######################## @@ -799,16 +1010,19 @@ set(CUDA_FOUND TRUE) set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) +set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL + "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE) set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) + find_package_handle_standard_args(CUDA REQUIRED_VARS CUDA_TOOLKIT_ROOT_DIR CUDA_NVCC_EXECUTABLE CUDA_INCLUDE_DIRS - CUDA_CUDART_LIBRARY + ${CUDA_CUDART_LIBRARY_VAR} VERSION_VAR CUDA_VERSION ) @@ -825,7 +1039,7 @@ find_package_handle_standard_args(CUDA # Add include directories to pass to the nvcc command. macro(CUDA_INCLUDE_DIRECTORIES) foreach(dir ${ARGN}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER -I${dir}) + list(APPEND CUDA_NVCC_INCLUDE_DIRS_USER ${dir}) endforeach() endmacro() @@ -834,6 +1048,7 @@ endmacro() cuda_find_helper_file(parse_cubin cmake) cuda_find_helper_file(make2cmake cmake) cuda_find_helper_file(run_nvcc cmake) +include("${CMAKE_CURRENT_LIST_DIR}/FindCUDA/select_compute_arch.cmake") ############################################################################## # Separate the OPTIONS out from the sources @@ -844,15 +1059,15 @@ macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options) set( ${_options} ) set( _found_options FALSE ) foreach(arg ${ARGN}) - if(arg STREQUAL "OPTIONS") + if("x${arg}" STREQUAL "xOPTIONS") set( _found_options TRUE ) elseif( - arg STREQUAL "WIN32" OR - arg STREQUAL "MACOSX_BUNDLE" OR - arg STREQUAL "EXCLUDE_FROM_ALL" OR - arg STREQUAL "STATIC" OR - arg STREQUAL "SHARED" OR - arg STREQUAL "MODULE" + "x${arg}" STREQUAL "xWIN32" OR + "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR + "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR + "x${arg}" STREQUAL "xSTATIC" OR + "x${arg}" STREQUAL "xSHARED" OR + "x${arg}" STREQUAL "xMODULE" ) list(APPEND ${_cmake_options} ${arg}) else() @@ -948,7 +1163,7 @@ function(CUDA_COMPUTE_BUILD_PATH path build_path) endif() endif() - # This recipie is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the + # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the # CMake source. # Remove leading / @@ -977,7 +1192,7 @@ endfunction() # a .cpp or .ptx file. # INPUT: # cuda_target - Target name -# format - PTX or OBJ +# format - PTX, CUBIN, FATBIN or OBJ # FILE1 .. FILEN - The remaining arguments are the sources to be wrapped. # OPTIONS - Extra options to NVCC # OUTPUT: @@ -987,6 +1202,18 @@ endfunction() macro(CUDA_WRAP_SRCS cuda_target format generated_files) + # Put optional arguments in list. + set(_argn_list "${ARGN}") + # If one of the given optional arguments is "PHONY", make a note of it, then + # remove it from the list. + list(FIND _argn_list "PHONY" _phony_idx) + if("${_phony_idx}" GREATER "-1") + set(_target_is_phony true) + list(REMOVE_AT _argn_list ${_phony_idx}) + else() + set(_target_is_phony false) + endif() + # If CMake doesn't support separable compilation, complain if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1") message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1") @@ -1023,6 +1250,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(nvcc_flags ${nvcc_flags} -m32) endif() + if(CUDA_TARGET_CPU_ARCH) + set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}") + endif() + # This needs to be passed in at this stage, because VS needs to fill out the # value of VCInstallDir from within VS. Note that CCBIN is only used if # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches @@ -1044,18 +1275,27 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) endif() # Initialize our list of includes with the user ones followed by the CUDA system ones. - set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}") - # Get the include directories for this directory and use them for our nvcc command. - # Remove duplicate entries which may be present since include_directories - # in CMake >= 2.8.8 does not remove them. - get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES) - list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRECTORIES) - if(CUDA_NVCC_INCLUDE_DIRECTORIES) - foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES}) - list(APPEND CUDA_NVCC_INCLUDE_ARGS -I${dir}) - endforeach() + set(CUDA_NVCC_INCLUDE_DIRS ${CUDA_NVCC_INCLUDE_DIRS_USER} "${CUDA_INCLUDE_DIRS}") + if(_target_is_phony) + # If the passed in target name isn't a real target (i.e., this is from a call to one of the + # cuda_compile_* functions), need to query directory properties to get include directories + # and compile definitions. + get_directory_property(_dir_include_dirs INCLUDE_DIRECTORIES) + get_directory_property(_dir_compile_defs COMPILE_DEFINITIONS) + + list(APPEND CUDA_NVCC_INCLUDE_DIRS "${_dir_include_dirs}") + set(CUDA_NVCC_COMPILE_DEFINITIONS "${_dir_compile_defs}") + else() + # Append the include directories for this target via generator expression, which is + # expanded by the FILE(GENERATE) call below. This generator expression captures all + # include dirs set by the user, whether via directory properties or target properties + list(APPEND CUDA_NVCC_INCLUDE_DIRS "$<TARGET_PROPERTY:${cuda_target},INCLUDE_DIRECTORIES>") + + # Do the same thing with compile definitions + set(CUDA_NVCC_COMPILE_DEFINITIONS "$<TARGET_PROPERTY:${cuda_target},COMPILE_DEFINITIONS>") endif() + # Reset these variables set(CUDA_WRAP_OPTION_NVCC_FLAGS) foreach(config ${CUDA_configuration_types}) @@ -1063,7 +1303,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}) endforeach() - CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN}) + CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${_argn_list}) CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options}) # Figure out if we are building a shared library. BUILD_SHARED_LIBS is @@ -1122,21 +1362,26 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") endif() - set(_cuda_host_flags "${_cuda_host_flags}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") + string(APPEND _cuda_host_flags "\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})") endif() # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list # like it is currently), we can remove the quotes around the # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable. - set(_cuda_nvcc_flags_config "${_cuda_nvcc_flags_config}\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})") + string(APPEND _cuda_nvcc_flags_config "\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})") endforeach() - # Get the list of definitions from the directory property - get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS) - if(CUDA_NVCC_DEFINITIONS) - foreach(_definition ${CUDA_NVCC_DEFINITIONS}) - list(APPEND nvcc_flags "-D${_definition}") - endforeach() + # Process the C++11 flag. If the host sets the flag, we need to add it to nvcc and + # remove it from the host. This is because -Xcompile -std=c++ will choke nvcc (it uses + # the C preprocessor). In order to get this to work correctly, we need to use nvcc's + # specific c++11 flag. + if( "${_cuda_host_flags}" MATCHES "-std=c\\+\\+11") + # Add the c++11 flag to nvcc if it isn't already present. Note that we only look at + # the main flag instead of the configuration specific flags. + if( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std;c\\+\\+11" ) + list(APPEND nvcc_flags --std c++11) + endif() + string(REGEX REPLACE "[-]+std=c\\+\\+11" "" _cuda_host_flags "${_cuda_host_flags}") endif() if(_cuda_build_shared_libs) @@ -1148,27 +1393,39 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Iterate over the macro arguments and create custom # commands for all the .cu files. - foreach(file ${ARGN}) + foreach(file ${_argn_list}) # Ignore any file marked as a HEADER_FILE_ONLY get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - if(${file} MATCHES ".*\\.cu$" AND NOT _is_header) + # Allow per source file overrides of the format. Also allows compiling non-.cu files. + get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT) + if((${file} MATCHES "\\.cu$" OR _cuda_source_format) AND NOT _is_header) - # Allow per source file overrides of the format. - get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT) if(NOT _cuda_source_format) set(_cuda_source_format ${format}) endif() - - if( ${_cuda_source_format} MATCHES "PTX" ) - set( compile_to_ptx ON ) - elseif( ${_cuda_source_format} MATCHES "OBJ") - set( compile_to_ptx OFF ) + # If file isn't a .cu file, we need to tell nvcc to treat it as such. + if(NOT ${file} MATCHES "\\.cu$") + set(cuda_language_flag -x=cu) else() - message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'. Use OBJ or PTX.") + set(cuda_language_flag) endif() + if( ${_cuda_source_format} MATCHES "OBJ") + set( cuda_compile_to_external_module OFF ) + else() + set( cuda_compile_to_external_module ON ) + if( ${_cuda_source_format} MATCHES "PTX" ) + set( cuda_compile_to_external_module_type "ptx" ) + elseif( ${_cuda_source_format} MATCHES "CUBIN") + set( cuda_compile_to_external_module_type "cubin" ) + elseif( ${_cuda_source_format} MATCHES "FATBIN") + set( cuda_compile_to_external_module_type "fatbin" ) + else() + message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS or set with CUDA_SOURCE_PROPERTY_FORMAT file property for file '${file}': '${_cuda_source_format}'. Use OBJ, PTX, CUBIN or FATBIN.") + endif() + endif() - if(compile_to_ptx) + if(cuda_compile_to_external_module) # Don't use any of the host compilation flags for PTX targets. set(CUDA_HOST_FLAGS) set(CUDA_NVCC_FLAGS_CONFIG) @@ -1183,7 +1440,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) if(CUDA_GENERATED_OUTPUT_DIR) set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}") else() - if ( compile_to_ptx ) + if ( cuda_compile_to_external_module ) set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}") else() set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}") @@ -1193,10 +1450,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Add a custom target to generate a c or ptx file. ###################### get_filename_component( basename ${file} NAME ) - if( compile_to_ptx ) + if( cuda_compile_to_external_module ) set(generated_file_path "${cuda_compile_output_dir}") - set(generated_file_basename "${cuda_target}_generated_${basename}.ptx") - set(format_flag "-ptx") + set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}") + set(format_flag "-${cuda_compile_to_external_module_type}") file(MAKE_DIRECTORY "${cuda_compile_output_dir}") else() set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}") @@ -1216,10 +1473,11 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend") set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend") set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt") - set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake") + set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen") + set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake") # Setup properties for obj files: - if( NOT compile_to_ptx ) + if( NOT cuda_compile_to_external_module ) set_source_files_properties("${generated_file}" PROPERTIES EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked. @@ -1234,7 +1492,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") endif() - if( NOT compile_to_ptx AND CUDA_SEPARABLE_COMPILATION) + if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION) list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}") endif() @@ -1251,13 +1509,17 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Build the NVCC made dependency file ################################### set(build_cubin OFF) if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN ) - if ( NOT compile_to_ptx ) + if ( NOT cuda_compile_to_external_module ) set ( build_cubin ON ) endif() endif() # Configure the build script - configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY) + configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY) + file(GENERATE + OUTPUT "${custom_target_script}" + INPUT "${custom_target_script_pregen}" + ) # So if a user specifies the same cuda file as input more than once, you # can have bad things happen with dependencies. Here we check an option @@ -1278,12 +1540,17 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) # Create up the comment string file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - if(compile_to_ptx) - set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}") + if(cuda_compile_to_external_module) + set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}") else() set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}") endif() + set(_verbatim VERBATIM) + if(ccbin_flags MATCHES "\\$\\(VCInstallDir\\)") + set(_verbatim "") + endif() + # Build the generated file and dependency file ########################## add_custom_command( OUTPUT ${generated_file} @@ -1302,6 +1569,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) -P "${custom_target_script}" WORKING_DIRECTORY "${cuda_compile_intermediate_directory}" COMMENT "${cuda_build_comment_string}" + ${_verbatim} ) # Make sure the build system knows the file is generated. @@ -1323,10 +1591,10 @@ endmacro() function(_cuda_get_important_host_flags important_flags flag_string) if(CMAKE_GENERATOR MATCHES "Visual Studio") - string(REGEX MATCHALL "/M[DT][d]?" flags ${flag_string}) + string(REGEX MATCHALL "/M[DT][d]?" flags "${flag_string}") list(APPEND ${important_flags} ${flags}) else() - string(REGEX MATCHALL "-fPIC" flags ${flag_string}) + string(REGEX MATCHALL "-fPIC" flags "${flag_string}") list(APPEND ${important_flags} ${flags}) endif() set(${important_flags} ${${important_flags}} PARENT_SCOPE) @@ -1372,18 +1640,40 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options # If -ccbin, --compiler-bindir has been specified, don't do anything. Otherwise add it here. list( FIND nvcc_flags "-ccbin" ccbin_found0 ) list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 ) - if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 ) - list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"") + if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) + # Match VERBATIM check below. + if(CUDA_HOST_COMPILER MATCHES "\\$\\(VCInstallDir\\)") + list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"") + else() + list(APPEND nvcc_flags -ccbin "${CUDA_HOST_COMPILER}") + endif() endif() + + # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG} and CMAKE_${CUDA_C_OR_CXX}_FLAGS* + set(config_specific_flags) set(flags) foreach(config ${CUDA_configuration_types}) string(TOUPPER ${config} config_upper) + # Add config specific flags + foreach(f ${CUDA_NVCC_FLAGS_${config_upper}}) + list(APPEND config_specific_flags $<$<CONFIG:${config}>:${f}>) + endforeach() set(important_host_flags) - _cuda_get_important_host_flags(important_host_flags ${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}) + _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}") foreach(f ${important_host_flags}) list(APPEND flags $<$<CONFIG:${config}>:-Xcompiler> $<$<CONFIG:${config}>:${f}>) endforeach() endforeach() + # Add CMAKE_${CUDA_C_OR_CXX}_FLAGS + set(important_host_flags) + _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}") + foreach(f ${important_host_flags}) + list(APPEND flags -Xcompiler ${f}) + endforeach() + + # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags + set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags}) + file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}") # Some generators don't handle the multiple levels of custom command @@ -1391,12 +1681,16 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options # we work around that issue by compiling the intermediate link object as a # pre-link custom command in that situation. set(do_obj_build_rule TRUE) - if (MSVC_VERSION GREATER 1599) - # VS 2010 and 2012 have this problem. If future versions fix this issue, - # it should still work, it just won't be as nice as the other method. + if (MSVC_VERSION GREATER 1599 AND MSVC_VERSION LESS 1800) + # VS 2010 and 2012 have this problem. set(do_obj_build_rule FALSE) endif() + set(_verbatim VERBATIM) + if(nvcc_flags MATCHES "\\$\\(VCInstallDir\\)") + set(_verbatim "") + endif() + if (do_obj_build_rule) add_custom_command( OUTPUT ${output_file} @@ -1404,13 +1698,17 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file} ${flags} COMMENT "Building NVCC intermediate link file ${output_file_relative_path}" + ${_verbatim} ) else() + get_filename_component(output_file_dir "${output_file}" DIRECTORY) add_custom_command( TARGET ${cuda_target} PRE_LINK COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${output_file_dir}" COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}" + ${_verbatim} ) endif() endif() @@ -1449,10 +1747,16 @@ macro(CUDA_ADD_LIBRARY cuda_target) # variable will have been defined. CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - target_link_libraries(${cuda_target} + target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_LIBRARIES} ) + if(CUDA_SEPARABLE_COMPILATION) + target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD} + ${CUDA_cudadevrt_LIBRARY} + ) + endif() + # We need to set the linker language based on what the expected generated file # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP. set_target_properties(${cuda_target} @@ -1493,7 +1797,7 @@ macro(CUDA_ADD_EXECUTABLE cuda_target) # variable will have been defined. CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}") - target_link_libraries(${cuda_target} + target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_LIBRARIES} ) @@ -1509,21 +1813,40 @@ endmacro() ############################################################################### ############################################################################### -# CUDA COMPILE +# (Internal) helper for manually added cuda source files with specific targets ############################################################################### ############################################################################### -macro(CUDA_COMPILE generated_files) +macro(cuda_compile_base cuda_target format generated_files) + # Update a counter in this directory, to keep phony target names unique. + set(_cuda_target "${cuda_target}") + get_property(_counter DIRECTORY PROPERTY _cuda_internal_phony_counter) + if(_counter) + math(EXPR _counter "${_counter} + 1") + else() + set(_counter 1) + endif() + string(APPEND _cuda_target "_${_counter}") + set_property(DIRECTORY PROPERTY _cuda_internal_phony_counter ${_counter}) # Separate the sources from the options CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) + # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options} - OPTIONS ${_options} ) + CUDA_WRAP_SRCS( ${_cuda_target} ${format} _generated_files ${_sources} + ${_cmake_options} OPTIONS ${_options} PHONY) set( ${generated_files} ${_generated_files}) endmacro() +############################################################################### +############################################################################### +# CUDA COMPILE +############################################################################### +############################################################################### +macro(CUDA_COMPILE generated_files) + cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN}) +endmacro() ############################################################################### ############################################################################### @@ -1531,17 +1854,28 @@ endmacro() ############################################################################### ############################################################################### macro(CUDA_COMPILE_PTX generated_files) + cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN}) +endmacro() - # Separate the sources from the options - CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN}) - # Create custom commands and targets for each file. - CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options} - OPTIONS ${_options} ) - - set( ${generated_files} ${_generated_files}) +############################################################################### +############################################################################### +# CUDA COMPILE FATBIN +############################################################################### +############################################################################### +macro(CUDA_COMPILE_FATBIN generated_files) + cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN}) +endmacro() +############################################################################### +############################################################################### +# CUDA COMPILE CUBIN +############################################################################### +############################################################################### +macro(CUDA_COMPILE_CUBIN generated_files) + cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN}) endmacro() + ############################################################################### ############################################################################### # CUDA ADD CUFFT TO TARGET @@ -1549,9 +1883,9 @@ endmacro() ############################################################################### macro(CUDA_ADD_CUFFT_TO_TARGET target) if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY}) + target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufftemu_LIBRARY}) else() - target_link_libraries(${target} ${CUDA_cufft_LIBRARY}) + target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufft_LIBRARY}) endif() endmacro() @@ -1562,9 +1896,9 @@ endmacro() ############################################################################### macro(CUDA_ADD_CUBLAS_TO_TARGET target) if (CUDA_BUILD_EMULATION) - target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY}) + target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublasemu_LIBRARY}) else() - target_link_libraries(${target} ${CUDA_cublas_LIBRARY}) + target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY}) endif() endmacro() |