summaryrefslogtreecommitdiff
path: root/Modules/FindCUDA.cmake
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/FindCUDA.cmake')
-rw-r--r--Modules/FindCUDA.cmake1156
1 files changed, 745 insertions, 411 deletions
diff --git a/Modules/FindCUDA.cmake b/Modules/FindCUDA.cmake
index 2705d3271..a4dca5433 100644
--- a/Modules/FindCUDA.cmake
+++ b/Modules/FindCUDA.cmake
@@ -1,292 +1,351 @@
-# - Tools for building CUDA C files: libraries and build dependencies.
-# This script locates the NVIDIA CUDA C tools. It should work on linux, windows,
-# and mac and should be reasonably up to date with CUDA C releases.
-#
-# This script makes use of the standard find_package arguments of <VERSION>,
-# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA
-# was found.
-#
-# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the prefix
-# cannot be determined by the location of nvcc in the system path and REQUIRED
-# is specified to find_package(). To use a different installed version of the
-# toolkit set the environment variable CUDA_BIN_PATH before running cmake
-# (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default /usr/local/cuda)
-# or set CUDA_TOOLKIT_ROOT_DIR after configuring. If you change the value of
-# CUDA_TOOLKIT_ROOT_DIR, various components that depend on the path will be
-# relocated.
+#.rst:
+# FindCUDA
+# --------
+#
+# Tools for building CUDA C files: libraries and build dependencies.
+#
+# This script locates the NVIDIA CUDA C tools. It should work on linux,
+# windows, and mac and should be reasonably up to date with CUDA C
+# releases.
+#
+# This script makes use of the standard find_package arguments of
+# <VERSION>, REQUIRED and QUIET. CUDA_FOUND will report if an
+# acceptable version of CUDA was found.
+#
+# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if
+# the prefix cannot be determined by the location of nvcc in the system
+# path and REQUIRED is specified to find_package(). To use a different
+# installed version of the toolkit set the environment variable
+# CUDA_BIN_PATH before running cmake (e.g.
+# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default
+# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring. If
+# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that
+# depend on the path will be relocated.
#
# It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain
-# platforms, or to use a cuda runtime not installed in the default location. In
-# newer versions of the toolkit the cuda library is included with the graphics
-# driver- be sure that the driver version matches what is needed by the cuda
-# runtime version.
-#
-# The following variables affect the behavior of the macros in the script (in
-# alphebetical order). Note that any of these flags can be changed multiple
-# times in the same directory before calling CUDA_ADD_EXECUTABLE,
-# CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX or CUDA_WRAP_SRCS.
-#
-# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
-# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
-# Note that making this different from the host code when generating object
-# or C files from CUDA code just won't work, because size_t gets defined by
-# nvcc in the generated source. If you compile to PTX and then load the
-# file yourself, you can mix bit sizes between device and host.
-#
-# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
-# -- Set to ON if you want the custom build rule to be attached to the source
-# file in Visual Studio. Turn OFF if you add the same cuda file to multiple
-# targets.
-#
-# This allows the user to build the target from the CUDA file; however, bad
-# things can happen if the CUDA source file is added to multiple targets.
-# When performing parallel builds it is possible for the custom build
-# command to be run more than once and in parallel causing cryptic build
-# errors. VS runs the rules for every source file in the target, and a
-# source can have only one rule no matter how many projects it is added to.
-# When the rule is run from multiple targets race conditions can occur on
-# the generated file. Eventually everything will get built, but if the user
-# is unaware of this behavior, there may be confusion. It would be nice if
-# this script could detect the reuse of source files across multiple targets
-# and turn the option off for the user, but no good solution could be found.
-#
-# CUDA_BUILD_CUBIN (Default OFF)
-# -- Set to ON to enable and extra compilation pass with the -cubin option in
-# Device mode. The output is parsed and register, shared memory usage is
-# printed during build.
-#
-# CUDA_BUILD_EMULATION (Default OFF for device mode)
-# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
-# when CUDA_BUILD_EMULATION is TRUE.
-#
-# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
-# -- Set to the path you wish to have the generated files placed. If it is
-# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
-# Intermediate files will always be placed in
-# CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
-#
-# CUDA_HOST_COMPILATION_CPP (Default ON)
-# -- Set to OFF for C compilation of host code.
-#
-# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS)
-# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or
-# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or
-# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets
-# $(VCInstallDir)/bin is a special value that expands out to the path when
-# the command is run from withing VS.
-#
-# CUDA_NVCC_FLAGS
-# CUDA_NVCC_FLAGS_<CONFIG>
-# -- Additional NVCC command line arguments. NOTE: multiple arguments must be
-# semi-colon delimited (e.g. --compiler-options;-Wall)
-#
-# CUDA_PROPAGATE_HOST_FLAGS (Default ON)
-# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
-# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
-# host compiler through nvcc's -Xcompiler flag. This helps make the
-# generated host code match the rest of the system better. Sometimes
-# certain flags give nvcc problems, and this will help you turn the flag
-# propagation off. This does not affect the flags supplied directly to nvcc
-# via CUDA_NVCC_FLAGS or through the OPTION flags specified through
-# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for
-# shared library compilation are not affected by this flag.
-#
-# CUDA_SEPARABLE_COMPILATION (Default OFF)
-# -- If set this will enable separable compilation for all CUDA runtime object
-# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY
-# (e.g. calling CUDA_WRAP_SRCS directly),
-# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and
-# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called.
-#
-# CUDA_VERBOSE_BUILD (Default OFF)
-# -- Set to ON to see all the commands used when building the CUDA file. When
-# using a Makefile generator the value defaults to VERBOSE (run make
-# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
-# always print the output.
-#
-# The script creates the following macros (in alphebetical order):
-#
-# CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
-# -- Adds the cufft library to the target (can be any target). Handles whether
-# you are in emulation mode or not.
-#
-# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
-# -- Adds the cublas library to the target (can be any target). Handles
-# whether you are in emulation mode or not.
-#
-# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
-# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
-# -- Creates an executable "cuda_target" which is made up of the files
-# specified. All of the non CUDA C files are compiled using the standard
-# build rules specified by CMAKE and the cuda files are compiled to object
-# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is
-# added automatically to include_directories(). Some standard CMake target
-# calls can be used on the target after calling this macro
-# (e.g. set_target_properties and target_link_libraries), but setting
-# properties that adjust compilation flags will not affect code compiled by
-# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
-# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
-#
-# CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
-# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
-# -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
-#
-# CUDA_BUILD_CLEAN_TARGET()
-# -- Creates a convience target that deletes all the dependency files
-# generated. You should make clean after running this target to ensure the
-# dependency files get regenerated.
-#
-# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
-# [OPTIONS ...] )
-# -- Returns a list of generated files from the input source files to be used
-# with ADD_LIBRARY or ADD_EXECUTABLE.
-#
-# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
-# -- Returns a list of PTX files generated from the input source files.
-#
-# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var
-# cuda_target
-# object_files )
-# -- Compute the name of the intermediate link file used for separable
-# compilation. This file name is typically passed into
-# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced
-# based on cuda_target the list of objects files that need separable
-# compilation as specified by object_files. If the object_files list is
-# empty, then output_file_var will be empty. This function is called
-# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that
-# this is a function and not a macro.
-#
-# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
-# -- Sets the directories that should be passed to nvcc
-# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
-# files.
-#
-#
-# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target
-# nvcc_flags object_files)
-#
-# -- Generates the link object required by separable compilation from the given
-# object files. This is called automatically for CUDA_ADD_EXECUTABLE and
-# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS
-# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the
-# nvcc_flags passed in are the same as the flags passed in via the OPTIONS
-# argument. The only nvcc flag added automatically is the bitness flag as
-# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function
-# instead of a macro.
-#
-# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
-# [STATIC | SHARED | MODULE] [OPTIONS ...] )
-# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE,
-# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
-# function under the hood.
-#
-# Given the list of files (file0 file1 ... fileN) this macro generates
-# custom commands that generate either PTX or linkable objects (use "PTX" or
-# "OBJ" for the format argument to switch). Files that don't end with .cu
-# or have the HEADER_FILE_ONLY property are ignored.
-#
-# The arguments passed in after OPTIONS are extra command line options to
-# give to nvcc. You can also specify per configuration options by
-# specifying the name of the configuration followed by the options. General
-# options must preceed configuration specific options. Not all
-# configurations need to be specified, only the ones provided will be used.
-#
-# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
-# DEBUG -g
-# RELEASE --use_fast_math
-# RELWITHDEBINFO --use_fast_math;-g
-# MINSIZEREL --use_fast_math
-#
-# For certain configurations (namely VS generating object files with
-# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
-# be produced for the given cuda file. This is because when you add the
-# cuda file to Visual Studio it knows that this file produces an object file
-# and will link in the resulting object file automatically.
-#
-# This script will also generate a separate cmake script that is used at
-# build time to invoke nvcc. This is for several reasons.
-#
-# 1. nvcc can return negative numbers as return values which confuses
-# Visual Studio into thinking that the command succeeded. The script now
-# checks the error codes and produces errors when there was a problem.
-#
-# 2. nvcc has been known to not delete incomplete results when it
-# encounters problems. This confuses build systems into thinking the
-# target was generated when in fact an unusable file exists. The script
-# now deletes the output files if there was an error.
-#
-# 3. By putting all the options that affect the build into a file and then
-# make the build rule dependent on the file, the output files will be
-# regenerated when the options change.
-#
-# This script also looks at optional arguments STATIC, SHARED, or MODULE to
-# determine when to target the object compilation for a shared library.
-# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
-# CUDA_ADD_LIBRARY. On some systems special flags are added for building
-# objects intended for shared libraries. A preprocessor macro,
-# <target_name>_EXPORTS is defined when a shared library compilation is
-# detected.
-#
-# Flags passed into add_definitions with -D or /D are passed along to nvcc.
-#
-# The script defines the following variables:
-#
-# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc.
-# CUDA_VERSION_MINOR -- The minor version.
-# CUDA_VERSION
-# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
-#
-# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
-# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the
-# SDK. This script will not directly support finding
-# specific libraries or headers, as that isn't
-# supported by NVIDIA. If you want to change
-# libraries when the path changes see the
-# FindCUDA.cmake script for an example of how to clear
-# these variables. There are also examples of how to
-# use the CUDA_SDK_ROOT_DIR to locate headers or
-# libraries, if you so choose (at your own risk).
-# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically
-# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
-# CUDA_LIBRARIES -- Cuda RT library.
-# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT
-# implementation (alternative to:
-# CUDA_ADD_CUFFT_TO_TARGET macro)
-# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
-# implementation (alterative to:
-# CUDA_ADD_CUBLAS_TO_TARGET macro).
-# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library.
-# Only available for CUDA version 4.0+.
-# CUDA_curand_LIBRARY -- CUDA Random Number Generation library.
-# Only available for CUDA version 3.2+.
-# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library.
-# Only available for CUDA version 3.2+.
-# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library.
-# Only available for CUDA version 4.0+.
-# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives library (core).
-# Only available for CUDA version 5.5+.
-# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives library (image processing).
-# Only available for CUDA version 5.5+.
-# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives library (signal processing).
-# Only available for CUDA version 5.5+.
-# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library.
-# Only available for CUDA version 3.2+.
-# Windows only.
-# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library.
-# Only available for CUDA version 3.2+.
-# Windows only.
-#
-#
-# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
-# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
-#
-# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
-#
-# Copyright (c) 2007-2009
-# Scientific Computing and Imaging Institute, University of Utah
-#
-# This code is licensed under the MIT License. See the FindCUDA.cmake script
-# for the text of the license.
+# platforms, or to use a cuda runtime not installed in the default
+# location. In newer versions of the toolkit the cuda library is
+# included with the graphics driver- be sure that the driver version
+# matches what is needed by the cuda runtime version.
+#
+# The following variables affect the behavior of the macros in the
+# script (in alphebetical order). Note that any of these flags can be
+# changed multiple times in the same directory before calling
+# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX,
+# CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS::
+#
+# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
+# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
+# Note that making this different from the host code when generating object
+# or C files from CUDA code just won't work, because size_t gets defined by
+# nvcc in the generated source. If you compile to PTX and then load the
+# file yourself, you can mix bit sizes between device and host.
+#
+# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
+# -- Set to ON if you want the custom build rule to be attached to the source
+# file in Visual Studio. Turn OFF if you add the same cuda file to multiple
+# targets.
+#
+# This allows the user to build the target from the CUDA file; however, bad
+# things can happen if the CUDA source file is added to multiple targets.
+# When performing parallel builds it is possible for the custom build
+# command to be run more than once and in parallel causing cryptic build
+# errors. VS runs the rules for every source file in the target, and a
+# source can have only one rule no matter how many projects it is added to.
+# When the rule is run from multiple targets race conditions can occur on
+# the generated file. Eventually everything will get built, but if the user
+# is unaware of this behavior, there may be confusion. It would be nice if
+# this script could detect the reuse of source files across multiple targets
+# and turn the option off for the user, but no good solution could be found.
+#
+# CUDA_BUILD_CUBIN (Default OFF)
+# -- Set to ON to enable and extra compilation pass with the -cubin option in
+# Device mode. The output is parsed and register, shared memory usage is
+# printed during build.
+#
+# CUDA_BUILD_EMULATION (Default OFF for device mode)
+# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
+# when CUDA_BUILD_EMULATION is TRUE.
+#
+# CUDA_LINK_LIBRARIES_KEYWORD (Default "")
+# -- The <PRIVATE|PUBLIC|INTERFACE> keyword to use for internal
+# target_link_libraries calls. The default is to use no keyword which
+# uses the old "plain" form of target_link_libraries. Note that is matters
+# because whatever is used inside the FindCUDA module must also be used
+# outside - the two forms of target_link_libraries cannot be mixed.
+#
+# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
+# -- Set to the path you wish to have the generated files placed. If it is
+# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
+# Intermediate files will always be placed in
+# CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
+#
+# CUDA_HOST_COMPILATION_CPP (Default ON)
+# -- Set to OFF for C compilation of host code.
+#
+# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS)
+# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or
+# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or
+# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets
+# $(VCInstallDir)/bin is a special value that expands out to the path when
+# the command is run from within VS.
+#
+# CUDA_NVCC_FLAGS
+# CUDA_NVCC_FLAGS_<CONFIG>
+# -- Additional NVCC command line arguments. NOTE: multiple arguments must be
+# semi-colon delimited (e.g. --compiler-options;-Wall)
+#
+# CUDA_PROPAGATE_HOST_FLAGS (Default ON)
+# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
+# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
+# host compiler through nvcc's -Xcompiler flag. This helps make the
+# generated host code match the rest of the system better. Sometimes
+# certain flags give nvcc problems, and this will help you turn the flag
+# propagation off. This does not affect the flags supplied directly to nvcc
+# via CUDA_NVCC_FLAGS or through the OPTION flags specified through
+# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for
+# shared library compilation are not affected by this flag.
+#
+# CUDA_SEPARABLE_COMPILATION (Default OFF)
+# -- If set this will enable separable compilation for all CUDA runtime object
+# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY
+# (e.g. calling CUDA_WRAP_SRCS directly),
+# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and
+# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called.
+#
+# CUDA_SOURCE_PROPERTY_FORMAT
+# -- If this source file property is set, it can override the format specified
+# to CUDA_WRAP_SRCS (OBJ, PTX, CUBIN, or FATBIN). If an input source file
+# is not a .cu file, setting this file will cause it to be treated as a .cu
+# file. See documentation for set_source_files_properties on how to set
+# this property.
+#
+# CUDA_USE_STATIC_CUDA_RUNTIME (Default ON)
+# -- When enabled the static version of the CUDA runtime library will be used
+# in CUDA_LIBRARIES. If the version of CUDA configured doesn't support
+# this option, then it will be silently disabled.
+#
+# CUDA_VERBOSE_BUILD (Default OFF)
+# -- Set to ON to see all the commands used when building the CUDA file. When
+# using a Makefile generator the value defaults to VERBOSE (run make
+# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
+# always print the output.
+#
+# The script creates the following macros (in alphebetical order)::
+#
+# CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
+# -- Adds the cufft library to the target (can be any target). Handles whether
+# you are in emulation mode or not.
+#
+# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
+# -- Adds the cublas library to the target (can be any target). Handles
+# whether you are in emulation mode or not.
+#
+# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
+# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
+# -- Creates an executable "cuda_target" which is made up of the files
+# specified. All of the non CUDA C files are compiled using the standard
+# build rules specified by CMAKE and the cuda files are compiled to object
+# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is
+# added automatically to include_directories(). Some standard CMake target
+# calls can be used on the target after calling this macro
+# (e.g. set_target_properties and target_link_libraries), but setting
+# properties that adjust compilation flags will not affect code compiled by
+# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
+# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
+#
+# CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
+# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
+# -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
+#
+# CUDA_BUILD_CLEAN_TARGET()
+# -- Creates a convience target that deletes all the dependency files
+# generated. You should make clean after running this target to ensure the
+# dependency files get regenerated.
+#
+# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
+# [OPTIONS ...] )
+# -- Returns a list of generated files from the input source files to be used
+# with ADD_LIBRARY or ADD_EXECUTABLE.
+#
+# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
+# -- Returns a list of PTX files generated from the input source files.
+#
+# CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] )
+# -- Returns a list of FATBIN files generated from the input source files.
+#
+# CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] )
+# -- Returns a list of CUBIN files generated from the input source files.
+#
+# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var
+# cuda_target
+# object_files )
+# -- Compute the name of the intermediate link file used for separable
+# compilation. This file name is typically passed into
+# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced
+# based on cuda_target the list of objects files that need separable
+# compilation as specified by object_files. If the object_files list is
+# empty, then output_file_var will be empty. This function is called
+# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that
+# this is a function and not a macro.
+#
+# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
+# -- Sets the directories that should be passed to nvcc
+# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
+# files.
+#
+#
+# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target
+# nvcc_flags object_files)
+# -- Generates the link object required by separable compilation from the given
+# object files. This is called automatically for CUDA_ADD_EXECUTABLE and
+# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS
+# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the
+# nvcc_flags passed in are the same as the flags passed in via the OPTIONS
+# argument. The only nvcc flag added automatically is the bitness flag as
+# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function
+# instead of a macro.
+#
+# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures])
+# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures
+# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...)
+# - "Auto" detects local machine GPU compute arch at runtime.
+# - "Common" and "All" cover common and entire subsets of architectures
+# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
+# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal
+# NUM: Any number. Only those pairs are currently accepted by NVCC though:
+# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2
+# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
+# Additionally, sets ${out_variable}_readable to the resulting numeric list
+# Example:
+# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell)
+# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
+#
+# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA
+# Note that this is a function instead of a macro.
+#
+# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
+# [STATIC | SHARED | MODULE] [OPTIONS ...] )
+# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE,
+# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
+# function under the hood.
+#
+# Given the list of files (file0 file1 ... fileN) this macro generates
+# custom commands that generate either PTX or linkable objects (use "PTX" or
+# "OBJ" for the format argument to switch). Files that don't end with .cu
+# or have the HEADER_FILE_ONLY property are ignored.
+#
+# The arguments passed in after OPTIONS are extra command line options to
+# give to nvcc. You can also specify per configuration options by
+# specifying the name of the configuration followed by the options. General
+# options must precede configuration specific options. Not all
+# configurations need to be specified, only the ones provided will be used.
+#
+# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
+# DEBUG -g
+# RELEASE --use_fast_math
+# RELWITHDEBINFO --use_fast_math;-g
+# MINSIZEREL --use_fast_math
+#
+# For certain configurations (namely VS generating object files with
+# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
+# be produced for the given cuda file. This is because when you add the
+# cuda file to Visual Studio it knows that this file produces an object file
+# and will link in the resulting object file automatically.
+#
+# This script will also generate a separate cmake script that is used at
+# build time to invoke nvcc. This is for several reasons.
+#
+# 1. nvcc can return negative numbers as return values which confuses
+# Visual Studio into thinking that the command succeeded. The script now
+# checks the error codes and produces errors when there was a problem.
+#
+# 2. nvcc has been known to not delete incomplete results when it
+# encounters problems. This confuses build systems into thinking the
+# target was generated when in fact an unusable file exists. The script
+# now deletes the output files if there was an error.
+#
+# 3. By putting all the options that affect the build into a file and then
+# make the build rule dependent on the file, the output files will be
+# regenerated when the options change.
+#
+# This script also looks at optional arguments STATIC, SHARED, or MODULE to
+# determine when to target the object compilation for a shared library.
+# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
+# CUDA_ADD_LIBRARY. On some systems special flags are added for building
+# objects intended for shared libraries. A preprocessor macro,
+# <target_name>_EXPORTS is defined when a shared library compilation is
+# detected.
+#
+# Flags passed into add_definitions with -D or /D are passed along to nvcc.
+#
+#
+#
+# The script defines the following variables::
+#
+# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc.
+# CUDA_VERSION_MINOR -- The minor version.
+# CUDA_VERSION
+# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
+# CUDA_HAS_FP16 -- Whether a short float (float16,fp16) is supported.
+#
+# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
+# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the
+# SDK. This script will not directly support finding
+# specific libraries or headers, as that isn't
+# supported by NVIDIA. If you want to change
+# libraries when the path changes see the
+# FindCUDA.cmake script for an example of how to clear
+# these variables. There are also examples of how to
+# use the CUDA_SDK_ROOT_DIR to locate headers or
+# libraries, if you so choose (at your own risk).
+# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically
+# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
+# CUDA_LIBRARIES -- Cuda RT library.
+# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT
+# implementation (alternative to:
+# CUDA_ADD_CUFFT_TO_TARGET macro)
+# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
+# implementation (alternative to:
+# CUDA_ADD_CUBLAS_TO_TARGET macro).
+# CUDA_cudart_static_LIBRARY -- Statically linkable cuda runtime library.
+# Only available for CUDA version 5.5+
+# CUDA_cudadevrt_LIBRARY -- Device runtime library.
+# Required for separable compilation.
+# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library.
+# Only available for CUDA version 4.0+.
+# CUDA_curand_LIBRARY -- CUDA Random Number Generation library.
+# Only available for CUDA version 3.2+.
+# CUDA_cusolver_LIBRARY -- CUDA Direct Solver library.
+# Only available for CUDA version 7.0+.
+# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library.
+# Only available for CUDA version 3.2+.
+# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives lib.
+# Only available for CUDA version 4.0+.
+# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives lib (core).
+# Only available for CUDA version 5.5+.
+# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives lib (image processing).
+# Only available for CUDA version 5.5+.
+# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives lib (signal processing).
+# Only available for CUDA version 5.5+.
+# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library.
+# Only available for CUDA version 3.2+.
+# Windows only.
+# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library.
+# Only available for CUDA version 3.2+.
+# Windows only.
+#
+
+# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
+#
+# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
+#
+# Copyright (c) 2007-2009
+# Scientific Computing and Imaging Institute, University of Utah
+#
+# This code is licensed under the MIT License. See the FindCUDA.cmake script
+# for the text of the license.
# The MIT License
#
@@ -313,11 +372,6 @@
# FindCUDA.cmake
-# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3)
-cmake_policy(PUSH)
-cmake_minimum_required(VERSION 2.6.3)
-cmake_policy(POP)
-
# This macro helps us find the location of helper files we will need the full path to
macro(CUDA_FIND_HELPER_FILE _name _extension)
set(_full_name "${_name}.${_extension}")
@@ -440,7 +494,31 @@ set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.")
if(CMAKE_GENERATOR MATCHES "Visual Studio")
set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC")
else()
- set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}" CACHE FILEPATH "Host side compiler used by NVCC")
+ if(APPLE
+ AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang"
+ AND "${CMAKE_C_COMPILER}" MATCHES "/cc$")
+ # Using cc which is symlink to clang may let NVCC think it is GCC and issue
+ # unhandled -dumpspecs option to clang. Also in case neither
+ # CMAKE_C_COMPILER is defined (project does not use C language) nor
+ # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let
+ # nvcc use its own default C compiler.
+ # Only care about this on APPLE with clang to avoid
+ # following symlinks to things like ccache
+ if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER)
+ get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
+ # if the real path does not end up being clang then
+ # go back to using CMAKE_C_COMPILER
+ if(NOT "${c_compiler_realpath}" MATCHES "/clang$")
+ set(c_compiler_realpath "${CMAKE_C_COMPILER}")
+ endif()
+ else()
+ set(c_compiler_realpath "")
+ endif()
+ set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC")
+ else()
+ set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}"
+ CACHE FILEPATH "Host side compiler used by NVCC")
+ endif()
endif()
# Propagate the host flags to the host compiler via -Xcompiler
@@ -459,6 +537,10 @@ mark_as_advanced(
CUDA_HOST_COMPILATION_CPP
CUDA_NVCC_FLAGS
CUDA_PROPAGATE_HOST_FLAGS
+ CUDA_BUILD_CUBIN
+ CUDA_BUILD_EMULATION
+ CUDA_VERBOSE_BUILD
+ CUDA_SEPARABLE_COMPILATION
)
# Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we
@@ -481,25 +563,25 @@ endforeach()
###############################################################################
###############################################################################
-# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
-# if they have then clear the cache variables, so that will be detected again.
-if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
- unset(CUDA_NVCC_EXECUTABLE CACHE)
+macro(cuda_unset_include_and_libraries)
unset(CUDA_TOOLKIT_INCLUDE CACHE)
unset(CUDA_CUDART_LIBRARY CACHE)
+ unset(CUDA_CUDA_LIBRARY CACHE)
# Make sure you run this before you unset CUDA_VERSION.
if(CUDA_VERSION VERSION_EQUAL "3.0")
# This only existed in the 3.0 version of the CUDA toolkit
unset(CUDA_CUDARTEMU_LIBRARY CACHE)
endif()
- unset(CUDA_VERSION CACHE)
- unset(CUDA_CUDA_LIBRARY CACHE)
- unset(CUDA_cupti_LIBRARY CACHE)
+ unset(CUDA_cudart_static_LIBRARY CACHE)
+ unset(CUDA_cudadevrt_LIBRARY CACHE)
unset(CUDA_cublas_LIBRARY CACHE)
+ unset(CUDA_cublas_device_LIBRARY CACHE)
unset(CUDA_cublasemu_LIBRARY CACHE)
unset(CUDA_cufft_LIBRARY CACHE)
unset(CUDA_cufftemu_LIBRARY CACHE)
+ unset(CUDA_cupti_LIBRARY CACHE)
unset(CUDA_curand_LIBRARY CACHE)
+ unset(CUDA_cusolver_LIBRARY CACHE)
unset(CUDA_cusparse_LIBRARY CACHE)
unset(CUDA_npp_LIBRARY CACHE)
unset(CUDA_nppc_LIBRARY CACHE)
@@ -507,33 +589,50 @@ if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
unset(CUDA_npps_LIBRARY CACHE)
unset(CUDA_nvcuvenc_LIBRARY CACHE)
unset(CUDA_nvcuvid_LIBRARY CACHE)
-endif()
+ unset(CUDA_USE_STATIC_CUDA_RUNTIME CACHE)
+ unset(CUDA_GPU_DETECT_OUTPUT CACHE)
+endmacro()
-if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}")
- # No specific variables to catch. Use this kind of code before calling
- # find_package(CUDA) to clean up any variables that may depend on this path.
+# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
+# if they have then clear the cache variables, so that will be detected again.
+if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
+ unset(CUDA_TOOLKIT_TARGET_DIR CACHE)
+ unset(CUDA_NVCC_EXECUTABLE CACHE)
+ cuda_unset_include_and_libraries()
+ unset(CUDA_VERSION CACHE)
+endif()
- # unset(MY_SPECIAL_CUDA_SDK_INCLUDE_DIR CACHE)
- # unset(MY_SPECIAL_CUDA_SDK_LIBRARY CACHE)
+if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}")
+ cuda_unset_include_and_libraries()
endif()
-# Search for the cuda distribution.
-if(NOT CUDA_TOOLKIT_ROOT_DIR)
+#
+# End of unset()
+#
+
+#
+# Start looking for things
+#
+# Search for the cuda distribution.
+if(NOT CUDA_TOOLKIT_ROOT_DIR AND NOT CMAKE_CROSSCOMPILING)
# Search in the CUDA_BIN_PATH first.
find_path(CUDA_TOOLKIT_ROOT_DIR
NAMES nvcc nvcc.exe
PATHS
+ ENV CUDA_TOOLKIT_ROOT
ENV CUDA_PATH
ENV CUDA_BIN_PATH
PATH_SUFFIXES bin bin64
DOC "Toolkit location."
NO_DEFAULT_PATH
)
+
# Now search default paths
find_path(CUDA_TOOLKIT_ROOT_DIR
NAMES nvcc nvcc.exe
- PATHS /usr/local/bin
+ PATHS /opt/cuda/bin
+ /usr/local/bin
/usr/local/cuda/bin
DOC "Toolkit location."
)
@@ -542,7 +641,9 @@ if(NOT CUDA_TOOLKIT_ROOT_DIR)
string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})
# We need to force this back into the cache.
set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)
+ set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
endif()
+
if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
if(CUDA_FIND_REQUIRED)
message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")
@@ -552,8 +653,45 @@ if(NOT CUDA_TOOLKIT_ROOT_DIR)
endif ()
endif ()
+if(CMAKE_CROSSCOMPILING)
+ SET (CUDA_TOOLKIT_ROOT $ENV{CUDA_TOOLKIT_ROOT})
+ if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
+ # Support for NVPACK
+ set (CUDA_TOOLKIT_TARGET_NAME "armv7-linux-androideabi")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+ # Support for arm cross compilation
+ set(CUDA_TOOLKIT_TARGET_NAME "armv7-linux-gnueabihf")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+ # Support for aarch64 cross compilation
+ if (ANDROID_ARCH_NAME STREQUAL "arm64")
+ set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux-androideabi")
+ else()
+ set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux")
+ endif (ANDROID_ARCH_NAME STREQUAL "arm64")
+ endif()
+
+ if (EXISTS "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}")
+ set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}" CACHE PATH "CUDA Toolkit target location.")
+ SET (CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT})
+ mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR)
+ endif()
+
+ # add known CUDA targetr root path to the set of directories we search for programs, libraries and headers
+ set( CMAKE_FIND_ROOT_PATH "${CUDA_TOOLKIT_TARGET_DIR};${CMAKE_FIND_ROOT_PATH}")
+ macro( cuda_find_host_program )
+ find_host_program( ${ARGN} )
+ endmacro()
+else()
+ # for non-cross-compile, find_host_program == find_program and CUDA_TOOLKIT_TARGET_DIR == CUDA_TOOLKIT_ROOT_DIR
+ macro( cuda_find_host_program )
+ find_program( ${ARGN} )
+ endmacro()
+ SET (CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
+endif()
+
+
# CUDA_NVCC_EXECUTABLE
-find_program(CUDA_NVCC_EXECUTABLE
+cuda_find_host_program(CUDA_NVCC_EXECUTABLE
NAMES nvcc
PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
ENV CUDA_PATH
@@ -562,7 +700,7 @@ find_program(CUDA_NVCC_EXECUTABLE
NO_DEFAULT_PATH
)
# Search default search paths, after we search our own set of paths.
-find_program(CUDA_NVCC_EXECUTABLE nvcc)
+cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
mark_as_advanced(CUDA_NVCC_EXECUTABLE)
if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
@@ -578,13 +716,14 @@ else()
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}")
endif()
+
# Always set this convenience variable
set(CUDA_VERSION_STRING "${CUDA_VERSION}")
# CUDA_TOOLKIT_INCLUDE
find_path(CUDA_TOOLKIT_INCLUDE
device_functions.h # Header included in toolkit
- PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
+ PATHS ${CUDA_TOOLKIT_TARGET_DIR}
ENV CUDA_PATH
ENV CUDA_INC_PATH
PATH_SUFFIXES include
@@ -594,8 +733,14 @@ find_path(CUDA_TOOLKIT_INCLUDE
find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)
mark_as_advanced(CUDA_TOOLKIT_INCLUDE)
+if (CUDA_VERSION VERSION_GREATER "7.0" OR EXISTS "${CUDA_TOOLKIT_INCLUDE}/cuda_fp16.h")
+ set(CUDA_HAS_FP16 TRUE)
+else()
+ set(CUDA_HAS_FP16 FALSE)
+endif()
+
# Set the user list of include dir to nothing to initialize it.
-set (CUDA_NVCC_INCLUDE_ARGS_USER "")
+set (CUDA_NVCC_INCLUDE_DIRS_USER "")
set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext )
@@ -608,19 +753,21 @@ macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext )
# (lib/Win32) and the old path (lib).
find_library(${_var}
NAMES ${_names}
- PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
+ PATHS "${CUDA_TOOLKIT_TARGET_DIR}"
ENV CUDA_PATH
ENV CUDA_LIB_PATH
PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32"
DOC ${_doc}
NO_DEFAULT_PATH
)
- # Search default search paths, after we search our own set of paths.
- find_library(${_var}
- NAMES ${_names}
- PATHS "/usr/lib/nvidia-current"
- DOC ${_doc}
- )
+ if (NOT CMAKE_CROSSCOMPILING)
+ # Search default search paths, after we search our own set of paths.
+ find_library(${_var}
+ NAMES ${_names}
+ PATHS "/usr/lib/nvidia-current"
+ DOC ${_doc}
+ )
+ endif()
endmacro()
macro(cuda_find_library_local_first _var _names _doc)
@@ -642,30 +789,89 @@ if(CUDA_VERSION VERSION_EQUAL "3.0")
)
endif()
+if(NOT CUDA_VERSION VERSION_LESS "5.5")
+ cuda_find_library_local_first(CUDA_cudart_static_LIBRARY cudart_static "static CUDA runtime library")
+ mark_as_advanced(CUDA_cudart_static_LIBRARY)
+endif()
+
+
+if(CUDA_cudart_static_LIBRARY)
+ # If static cudart available, use it by default, but provide a user-visible option to disable it.
+ option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON)
+ set(CUDA_CUDART_LIBRARY_VAR CUDA_cudart_static_LIBRARY)
+else()
+ # If not available, silently disable the option.
+ set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+ set(CUDA_CUDART_LIBRARY_VAR CUDA_CUDART_LIBRARY)
+endif()
+if(NOT CUDA_VERSION VERSION_LESS "5.0")
+ cuda_find_library_local_first(CUDA_cudadevrt_LIBRARY cudadevrt "\"cudadevrt\" library")
+ mark_as_advanced(CUDA_cudadevrt_LIBRARY)
+endif()
+
+if(CUDA_USE_STATIC_CUDA_RUNTIME)
+ if(UNIX)
+ # Check for the dependent libraries. Here we look for pthreads.
+ if (DEFINED CMAKE_THREAD_PREFER_PTHREAD)
+ set(_cuda_cmake_thread_prefer_pthread ${CMAKE_THREAD_PREFER_PTHREAD})
+ endif()
+ set(CMAKE_THREAD_PREFER_PTHREAD 1)
+
+ # Many of the FindXYZ CMake comes with makes use of try_compile with int main(){return 0;}
+ # as the source file. Unfortunately this causes a warning with -Wstrict-prototypes and
+ # -Werror causes the try_compile to fail. We will just temporarily disable other flags
+ # when doing the find_package command here.
+ set(_cuda_cmake_c_flags ${CMAKE_C_FLAGS})
+ set(CMAKE_C_FLAGS "-fPIC")
+ find_package(Threads REQUIRED)
+ set(CMAKE_C_FLAGS ${_cuda_cmake_c_flags})
+
+ if (DEFINED _cuda_cmake_thread_prefer_pthread)
+ set(CMAKE_THREAD_PREFER_PTHREAD ${_cuda_cmake_thread_prefer_pthread})
+ unset(_cuda_cmake_thread_prefer_pthread)
+ else()
+ unset(CMAKE_THREAD_PREFER_PTHREAD)
+ endif()
+
+ if(NOT APPLE)
+ #On Linux, you must link against librt when using the static cuda runtime.
+ find_library(CUDA_rt_LIBRARY rt)
+ if (NOT CUDA_rt_LIBRARY)
+ message(WARNING "Expecting to find librt for libcudart_static, but didn't find it.")
+ endif()
+ endif()
+ endif()
+endif()
+
# CUPTI library showed up in cuda toolkit 4.0
if(NOT CUDA_VERSION VERSION_LESS "4.0")
cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/")
mark_as_advanced(CUDA_cupti_LIBRARY)
endif()
+# Set the CUDA_LIBRARIES variable. This is the set of stuff to link against if you are
+# using the CUDA runtime. For the dynamic version of the runtime, most of the
+# dependencies are brough in, but for the static version there are additional libraries
+# and linker commands needed.
+# Initialize to empty
+set(CUDA_LIBRARIES)
+
# If we are using emulation mode and we found the cudartemu library then use
# that one instead of cudart.
if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY)
- set(CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY})
-else()
- set(CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
-endif()
-if(APPLE)
- # We need to add the path to cudart to the linker using rpath, since the
- # library name for the cuda libraries is prepended with @rpath.
- if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY)
- get_filename_component(_cuda_path_to_cudart "${CUDA_CUDARTEMU_LIBRARY}" PATH)
- else()
- get_filename_component(_cuda_path_to_cudart "${CUDA_CUDART_LIBRARY}" PATH)
+ list(APPEND CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY})
+elseif(CUDA_USE_STATIC_CUDA_RUNTIME AND CUDA_cudart_static_LIBRARY)
+ list(APPEND CUDA_LIBRARIES ${CUDA_cudart_static_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
+ if (CUDA_rt_LIBRARY)
+ list(APPEND CUDA_LIBRARIES ${CUDA_rt_LIBRARY})
endif()
- if(_cuda_path_to_cudart)
- list(APPEND CUDA_LIBRARIES -Wl,-rpath "-Wl,${_cuda_path_to_cudart}")
+ if(APPLE)
+ # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+ # the static cuda runtime can find it at runtime.
+ list(APPEND CUDA_LIBRARIES -Wl,-rpath,/usr/local/cuda/lib)
endif()
+else()
+ list(APPEND CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
endif()
# 1.1 toolkit on linux doesn't appear to have a separate library on
@@ -710,6 +916,7 @@ if(NOT CUDA_VERSION VERSION_LESS "3.2")
endif()
endif()
if(CUDA_VERSION VERSION_GREATER "5.0")
+ find_cuda_helper_libs(cublas_device)
# In CUDA 5.5 NPP was splitted onto 3 separate libraries.
find_cuda_helper_libs(nppc)
find_cuda_helper_libs(nppi)
@@ -718,13 +925,17 @@ if(CUDA_VERSION VERSION_GREATER "5.0")
elseif(NOT CUDA_VERSION VERSION_LESS "4.0")
find_cuda_helper_libs(npp)
endif()
+if(NOT CUDA_VERSION VERSION_LESS "7.0")
+ # cusolver showed up in version 7.0
+ find_cuda_helper_libs(cusolver)
+endif()
if (CUDA_BUILD_EMULATION)
set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})
set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY})
else()
set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})
- set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY})
+ set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY})
endif()
########################
@@ -799,16 +1010,19 @@ set(CUDA_FOUND TRUE)
set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL
"This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)
+set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL
+ "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE)
set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL
"This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)
include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
+
find_package_handle_standard_args(CUDA
REQUIRED_VARS
CUDA_TOOLKIT_ROOT_DIR
CUDA_NVCC_EXECUTABLE
CUDA_INCLUDE_DIRS
- CUDA_CUDART_LIBRARY
+ ${CUDA_CUDART_LIBRARY_VAR}
VERSION_VAR
CUDA_VERSION
)
@@ -825,7 +1039,7 @@ find_package_handle_standard_args(CUDA
# Add include directories to pass to the nvcc command.
macro(CUDA_INCLUDE_DIRECTORIES)
foreach(dir ${ARGN})
- list(APPEND CUDA_NVCC_INCLUDE_ARGS_USER -I${dir})
+ list(APPEND CUDA_NVCC_INCLUDE_DIRS_USER ${dir})
endforeach()
endmacro()
@@ -834,6 +1048,7 @@ endmacro()
cuda_find_helper_file(parse_cubin cmake)
cuda_find_helper_file(make2cmake cmake)
cuda_find_helper_file(run_nvcc cmake)
+include("${CMAKE_CURRENT_LIST_DIR}/FindCUDA/select_compute_arch.cmake")
##############################################################################
# Separate the OPTIONS out from the sources
@@ -844,15 +1059,15 @@ macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options)
set( ${_options} )
set( _found_options FALSE )
foreach(arg ${ARGN})
- if(arg STREQUAL "OPTIONS")
+ if("x${arg}" STREQUAL "xOPTIONS")
set( _found_options TRUE )
elseif(
- arg STREQUAL "WIN32" OR
- arg STREQUAL "MACOSX_BUNDLE" OR
- arg STREQUAL "EXCLUDE_FROM_ALL" OR
- arg STREQUAL "STATIC" OR
- arg STREQUAL "SHARED" OR
- arg STREQUAL "MODULE"
+ "x${arg}" STREQUAL "xWIN32" OR
+ "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR
+ "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
+ "x${arg}" STREQUAL "xSTATIC" OR
+ "x${arg}" STREQUAL "xSHARED" OR
+ "x${arg}" STREQUAL "xMODULE"
)
list(APPEND ${_cmake_options} ${arg})
else()
@@ -948,7 +1163,7 @@ function(CUDA_COMPUTE_BUILD_PATH path build_path)
endif()
endif()
- # This recipie is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the
+ # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the
# CMake source.
# Remove leading /
@@ -977,7 +1192,7 @@ endfunction()
# a .cpp or .ptx file.
# INPUT:
# cuda_target - Target name
-# format - PTX or OBJ
+# format - PTX, CUBIN, FATBIN or OBJ
# FILE1 .. FILEN - The remaining arguments are the sources to be wrapped.
# OPTIONS - Extra options to NVCC
# OUTPUT:
@@ -987,6 +1202,18 @@ endfunction()
macro(CUDA_WRAP_SRCS cuda_target format generated_files)
+ # Put optional arguments in list.
+ set(_argn_list "${ARGN}")
+ # If one of the given optional arguments is "PHONY", make a note of it, then
+ # remove it from the list.
+ list(FIND _argn_list "PHONY" _phony_idx)
+ if("${_phony_idx}" GREATER "-1")
+ set(_target_is_phony true)
+ list(REMOVE_AT _argn_list ${_phony_idx})
+ else()
+ set(_target_is_phony false)
+ endif()
+
# If CMake doesn't support separable compilation, complain
if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1")
message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1")
@@ -1023,6 +1250,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
set(nvcc_flags ${nvcc_flags} -m32)
endif()
+ if(CUDA_TARGET_CPU_ARCH)
+ set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}")
+ endif()
+
# This needs to be passed in at this stage, because VS needs to fill out the
# value of VCInstallDir from within VS. Note that CCBIN is only used if
# -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches
@@ -1044,18 +1275,27 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
endif()
# Initialize our list of includes with the user ones followed by the CUDA system ones.
- set(CUDA_NVCC_INCLUDE_ARGS ${CUDA_NVCC_INCLUDE_ARGS_USER} "-I${CUDA_INCLUDE_DIRS}")
- # Get the include directories for this directory and use them for our nvcc command.
- # Remove duplicate entries which may be present since include_directories
- # in CMake >= 2.8.8 does not remove them.
- get_directory_property(CUDA_NVCC_INCLUDE_DIRECTORIES INCLUDE_DIRECTORIES)
- list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRECTORIES)
- if(CUDA_NVCC_INCLUDE_DIRECTORIES)
- foreach(dir ${CUDA_NVCC_INCLUDE_DIRECTORIES})
- list(APPEND CUDA_NVCC_INCLUDE_ARGS -I${dir})
- endforeach()
+ set(CUDA_NVCC_INCLUDE_DIRS ${CUDA_NVCC_INCLUDE_DIRS_USER} "${CUDA_INCLUDE_DIRS}")
+ if(_target_is_phony)
+ # If the passed in target name isn't a real target (i.e., this is from a call to one of the
+ # cuda_compile_* functions), need to query directory properties to get include directories
+ # and compile definitions.
+ get_directory_property(_dir_include_dirs INCLUDE_DIRECTORIES)
+ get_directory_property(_dir_compile_defs COMPILE_DEFINITIONS)
+
+ list(APPEND CUDA_NVCC_INCLUDE_DIRS "${_dir_include_dirs}")
+ set(CUDA_NVCC_COMPILE_DEFINITIONS "${_dir_compile_defs}")
+ else()
+ # Append the include directories for this target via generator expression, which is
+ # expanded by the FILE(GENERATE) call below. This generator expression captures all
+ # include dirs set by the user, whether via directory properties or target properties
+ list(APPEND CUDA_NVCC_INCLUDE_DIRS "$<TARGET_PROPERTY:${cuda_target},INCLUDE_DIRECTORIES>")
+
+ # Do the same thing with compile definitions
+ set(CUDA_NVCC_COMPILE_DEFINITIONS "$<TARGET_PROPERTY:${cuda_target},COMPILE_DEFINITIONS>")
endif()
+
# Reset these variables
set(CUDA_WRAP_OPTION_NVCC_FLAGS)
foreach(config ${CUDA_configuration_types})
@@ -1063,7 +1303,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})
endforeach()
- CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${ARGN})
+ CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${_argn_list})
CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})
# Figure out if we are building a shared library. BUILD_SHARED_LIBS is
@@ -1122,21 +1362,26 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
endif()
- set(_cuda_host_flags "${_cuda_host_flags}\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
+ string(APPEND _cuda_host_flags "\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
endif()
# Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list
# like it is currently), we can remove the quotes around the
# ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.
- set(_cuda_nvcc_flags_config "${_cuda_nvcc_flags_config}\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})")
+ string(APPEND _cuda_nvcc_flags_config "\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})")
endforeach()
- # Get the list of definitions from the directory property
- get_directory_property(CUDA_NVCC_DEFINITIONS COMPILE_DEFINITIONS)
- if(CUDA_NVCC_DEFINITIONS)
- foreach(_definition ${CUDA_NVCC_DEFINITIONS})
- list(APPEND nvcc_flags "-D${_definition}")
- endforeach()
+ # Process the C++11 flag. If the host sets the flag, we need to add it to nvcc and
+ # remove it from the host. This is because -Xcompile -std=c++ will choke nvcc (it uses
+ # the C preprocessor). In order to get this to work correctly, we need to use nvcc's
+ # specific c++11 flag.
+ if( "${_cuda_host_flags}" MATCHES "-std=c\\+\\+11")
+ # Add the c++11 flag to nvcc if it isn't already present. Note that we only look at
+ # the main flag instead of the configuration specific flags.
+ if( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std;c\\+\\+11" )
+ list(APPEND nvcc_flags --std c++11)
+ endif()
+ string(REGEX REPLACE "[-]+std=c\\+\\+11" "" _cuda_host_flags "${_cuda_host_flags}")
endif()
if(_cuda_build_shared_libs)
@@ -1148,27 +1393,39 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
# Iterate over the macro arguments and create custom
# commands for all the .cu files.
- foreach(file ${ARGN})
+ foreach(file ${_argn_list})
# Ignore any file marked as a HEADER_FILE_ONLY
get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
- if(${file} MATCHES ".*\\.cu$" AND NOT _is_header)
+ # Allow per source file overrides of the format. Also allows compiling non-.cu files.
+ get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT)
+ if((${file} MATCHES "\\.cu$" OR _cuda_source_format) AND NOT _is_header)
- # Allow per source file overrides of the format.
- get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT)
if(NOT _cuda_source_format)
set(_cuda_source_format ${format})
endif()
-
- if( ${_cuda_source_format} MATCHES "PTX" )
- set( compile_to_ptx ON )
- elseif( ${_cuda_source_format} MATCHES "OBJ")
- set( compile_to_ptx OFF )
+ # If file isn't a .cu file, we need to tell nvcc to treat it as such.
+ if(NOT ${file} MATCHES "\\.cu$")
+ set(cuda_language_flag -x=cu)
else()
- message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS for file '${file}': '${_cuda_source_format}'. Use OBJ or PTX.")
+ set(cuda_language_flag)
endif()
+ if( ${_cuda_source_format} MATCHES "OBJ")
+ set( cuda_compile_to_external_module OFF )
+ else()
+ set( cuda_compile_to_external_module ON )
+ if( ${_cuda_source_format} MATCHES "PTX" )
+ set( cuda_compile_to_external_module_type "ptx" )
+ elseif( ${_cuda_source_format} MATCHES "CUBIN")
+ set( cuda_compile_to_external_module_type "cubin" )
+ elseif( ${_cuda_source_format} MATCHES "FATBIN")
+ set( cuda_compile_to_external_module_type "fatbin" )
+ else()
+ message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS or set with CUDA_SOURCE_PROPERTY_FORMAT file property for file '${file}': '${_cuda_source_format}'. Use OBJ, PTX, CUBIN or FATBIN.")
+ endif()
+ endif()
- if(compile_to_ptx)
+ if(cuda_compile_to_external_module)
# Don't use any of the host compilation flags for PTX targets.
set(CUDA_HOST_FLAGS)
set(CUDA_NVCC_FLAGS_CONFIG)
@@ -1183,7 +1440,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
if(CUDA_GENERATED_OUTPUT_DIR)
set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")
else()
- if ( compile_to_ptx )
+ if ( cuda_compile_to_external_module )
set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")
else()
set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}")
@@ -1193,10 +1450,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
# Add a custom target to generate a c or ptx file. ######################
get_filename_component( basename ${file} NAME )
- if( compile_to_ptx )
+ if( cuda_compile_to_external_module )
set(generated_file_path "${cuda_compile_output_dir}")
- set(generated_file_basename "${cuda_target}_generated_${basename}.ptx")
- set(format_flag "-ptx")
+ set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}")
+ set(format_flag "-${cuda_compile_to_external_module_type}")
file(MAKE_DIRECTORY "${cuda_compile_output_dir}")
else()
set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")
@@ -1216,10 +1473,11 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
- set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake")
+ set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen")
+ set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake")
# Setup properties for obj files:
- if( NOT compile_to_ptx )
+ if( NOT cuda_compile_to_external_module )
set_source_files_properties("${generated_file}"
PROPERTIES
EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.
@@ -1234,7 +1492,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
endif()
- if( NOT compile_to_ptx AND CUDA_SEPARABLE_COMPILATION)
+ if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION)
list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}")
endif()
@@ -1251,13 +1509,17 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
# Build the NVCC made dependency file ###################################
set(build_cubin OFF)
if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )
- if ( NOT compile_to_ptx )
+ if ( NOT cuda_compile_to_external_module )
set ( build_cubin ON )
endif()
endif()
# Configure the build script
- configure_file("${CUDA_run_nvcc}" "${custom_target_script}" @ONLY)
+ configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY)
+ file(GENERATE
+ OUTPUT "${custom_target_script}"
+ INPUT "${custom_target_script_pregen}"
+ )
# So if a user specifies the same cuda file as input more than once, you
# can have bad things happen with dependencies. Here we check an option
@@ -1278,12 +1540,17 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
# Create up the comment string
file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
- if(compile_to_ptx)
- set(cuda_build_comment_string "Building NVCC ptx file ${generated_file_relative_path}")
+ if(cuda_compile_to_external_module)
+ set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}")
else()
set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")
endif()
+ set(_verbatim VERBATIM)
+ if(ccbin_flags MATCHES "\\$\\(VCInstallDir\\)")
+ set(_verbatim "")
+ endif()
+
# Build the generated file and dependency file ##########################
add_custom_command(
OUTPUT ${generated_file}
@@ -1302,6 +1569,7 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files)
-P "${custom_target_script}"
WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
COMMENT "${cuda_build_comment_string}"
+ ${_verbatim}
)
# Make sure the build system knows the file is generated.
@@ -1323,10 +1591,10 @@ endmacro()
function(_cuda_get_important_host_flags important_flags flag_string)
if(CMAKE_GENERATOR MATCHES "Visual Studio")
- string(REGEX MATCHALL "/M[DT][d]?" flags ${flag_string})
+ string(REGEX MATCHALL "/M[DT][d]?" flags "${flag_string}")
list(APPEND ${important_flags} ${flags})
else()
- string(REGEX MATCHALL "-fPIC" flags ${flag_string})
+ string(REGEX MATCHALL "-fPIC" flags "${flag_string}")
list(APPEND ${important_flags} ${flags})
endif()
set(${important_flags} ${${important_flags}} PARENT_SCOPE)
@@ -1372,18 +1640,40 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options
# If -ccbin, --compiler-bindir has been specified, don't do anything. Otherwise add it here.
list( FIND nvcc_flags "-ccbin" ccbin_found0 )
list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 )
- if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 )
- list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"")
+ if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
+ # Match VERBATIM check below.
+ if(CUDA_HOST_COMPILER MATCHES "\\$\\(VCInstallDir\\)")
+ list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"")
+ else()
+ list(APPEND nvcc_flags -ccbin "${CUDA_HOST_COMPILER}")
+ endif()
endif()
+
+ # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG} and CMAKE_${CUDA_C_OR_CXX}_FLAGS*
+ set(config_specific_flags)
set(flags)
foreach(config ${CUDA_configuration_types})
string(TOUPPER ${config} config_upper)
+ # Add config specific flags
+ foreach(f ${CUDA_NVCC_FLAGS_${config_upper}})
+ list(APPEND config_specific_flags $<$<CONFIG:${config}>:${f}>)
+ endforeach()
set(important_host_flags)
- _cuda_get_important_host_flags(important_host_flags ${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}})
+ _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
foreach(f ${important_host_flags})
list(APPEND flags $<$<CONFIG:${config}>:-Xcompiler> $<$<CONFIG:${config}>:${f}>)
endforeach()
endforeach()
+ # Add CMAKE_${CUDA_C_OR_CXX}_FLAGS
+ set(important_host_flags)
+ _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}")
+ foreach(f ${important_host_flags})
+ list(APPEND flags -Xcompiler ${f})
+ endforeach()
+
+ # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags
+ set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags})
+
file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")
# Some generators don't handle the multiple levels of custom command
@@ -1391,12 +1681,16 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options
# we work around that issue by compiling the intermediate link object as a
# pre-link custom command in that situation.
set(do_obj_build_rule TRUE)
- if (MSVC_VERSION GREATER 1599)
- # VS 2010 and 2012 have this problem. If future versions fix this issue,
- # it should still work, it just won't be as nice as the other method.
+ if (MSVC_VERSION GREATER 1599 AND MSVC_VERSION LESS 1800)
+ # VS 2010 and 2012 have this problem.
set(do_obj_build_rule FALSE)
endif()
+ set(_verbatim VERBATIM)
+ if(nvcc_flags MATCHES "\\$\\(VCInstallDir\\)")
+ set(_verbatim "")
+ endif()
+
if (do_obj_build_rule)
add_custom_command(
OUTPUT ${output_file}
@@ -1404,13 +1698,17 @@ function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options
COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file}
${flags}
COMMENT "Building NVCC intermediate link file ${output_file_relative_path}"
+ ${_verbatim}
)
else()
+ get_filename_component(output_file_dir "${output_file}" DIRECTORY)
add_custom_command(
TARGET ${cuda_target}
PRE_LINK
COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}"
+ COMMAND ${CMAKE_COMMAND} -E make_directory "${output_file_dir}"
COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}"
+ ${_verbatim}
)
endif()
endif()
@@ -1449,10 +1747,16 @@ macro(CUDA_ADD_LIBRARY cuda_target)
# variable will have been defined.
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
- target_link_libraries(${cuda_target}
+ target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
${CUDA_LIBRARIES}
)
+ if(CUDA_SEPARABLE_COMPILATION)
+ target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
+ ${CUDA_cudadevrt_LIBRARY}
+ )
+ endif()
+
# We need to set the linker language based on what the expected generated file
# would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
set_target_properties(${cuda_target}
@@ -1493,7 +1797,7 @@ macro(CUDA_ADD_EXECUTABLE cuda_target)
# variable will have been defined.
CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
- target_link_libraries(${cuda_target}
+ target_link_libraries(${cuda_target} ${CUDA_LINK_LIBRARIES_KEYWORD}
${CUDA_LIBRARIES}
)
@@ -1509,21 +1813,40 @@ endmacro()
###############################################################################
###############################################################################
-# CUDA COMPILE
+# (Internal) helper for manually added cuda source files with specific targets
###############################################################################
###############################################################################
-macro(CUDA_COMPILE generated_files)
+macro(cuda_compile_base cuda_target format generated_files)
+ # Update a counter in this directory, to keep phony target names unique.
+ set(_cuda_target "${cuda_target}")
+ get_property(_counter DIRECTORY PROPERTY _cuda_internal_phony_counter)
+ if(_counter)
+ math(EXPR _counter "${_counter} + 1")
+ else()
+ set(_counter 1)
+ endif()
+ string(APPEND _cuda_target "_${_counter}")
+ set_property(DIRECTORY PROPERTY _cuda_internal_phony_counter ${_counter})
# Separate the sources from the options
CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
+
# Create custom commands and targets for each file.
- CUDA_WRAP_SRCS( cuda_compile OBJ _generated_files ${_sources} ${_cmake_options}
- OPTIONS ${_options} )
+ CUDA_WRAP_SRCS( ${_cuda_target} ${format} _generated_files ${_sources}
+ ${_cmake_options} OPTIONS ${_options} PHONY)
set( ${generated_files} ${_generated_files})
endmacro()
+###############################################################################
+###############################################################################
+# CUDA COMPILE
+###############################################################################
+###############################################################################
+macro(CUDA_COMPILE generated_files)
+ cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN})
+endmacro()
###############################################################################
###############################################################################
@@ -1531,17 +1854,28 @@ endmacro()
###############################################################################
###############################################################################
macro(CUDA_COMPILE_PTX generated_files)
+ cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN})
+endmacro()
- # Separate the sources from the options
- CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
- # Create custom commands and targets for each file.
- CUDA_WRAP_SRCS( cuda_compile_ptx PTX _generated_files ${_sources} ${_cmake_options}
- OPTIONS ${_options} )
-
- set( ${generated_files} ${_generated_files})
+###############################################################################
+###############################################################################
+# CUDA COMPILE FATBIN
+###############################################################################
+###############################################################################
+macro(CUDA_COMPILE_FATBIN generated_files)
+ cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN})
+endmacro()
+###############################################################################
+###############################################################################
+# CUDA COMPILE CUBIN
+###############################################################################
+###############################################################################
+macro(CUDA_COMPILE_CUBIN generated_files)
+ cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN})
endmacro()
+
###############################################################################
###############################################################################
# CUDA ADD CUFFT TO TARGET
@@ -1549,9 +1883,9 @@ endmacro()
###############################################################################
macro(CUDA_ADD_CUFFT_TO_TARGET target)
if (CUDA_BUILD_EMULATION)
- target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY})
+ target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufftemu_LIBRARY})
else()
- target_link_libraries(${target} ${CUDA_cufft_LIBRARY})
+ target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cufft_LIBRARY})
endif()
endmacro()
@@ -1562,9 +1896,9 @@ endmacro()
###############################################################################
macro(CUDA_ADD_CUBLAS_TO_TARGET target)
if (CUDA_BUILD_EMULATION)
- target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY})
+ target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublasemu_LIBRARY})
else()
- target_link_libraries(${target} ${CUDA_cublas_LIBRARY})
+ target_link_libraries(${target} ${CUDA_LINK_LIBRARIES_KEYWORD} ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY})
endif()
endmacro()