diff options
author | MyungJoo Ham <myungjoo.ham@samsung.com> | 2017-10-11 15:16:57 +0900 |
---|---|---|
committer | MyungJoo Ham <myungjoo.ham@samsung.com> | 2017-10-11 15:16:57 +0900 |
commit | 915c76ded744c0f5f151402b9fa69f3fd8452573 (patch) | |
tree | ca6a387466543248890f346847acaa8343989b22 /Modules/FindCUDA | |
parent | 317dbdb79761ef65e45c7358cfc7571c6afa54ad (diff) | |
download | cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.gz cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.bz2 cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.zip |
Imported Upstream version 3.9.4upstream/3.9.4
Diffstat (limited to 'Modules/FindCUDA')
-rw-r--r-- | Modules/FindCUDA/make2cmake.cmake | 25 | ||||
-rw-r--r-- | Modules/FindCUDA/parse_cubin.cmake | 15 | ||||
-rw-r--r-- | Modules/FindCUDA/run_nvcc.cmake | 28 | ||||
-rw-r--r-- | Modules/FindCUDA/select_compute_arch.cmake | 198 |
4 files changed, 247 insertions, 19 deletions
diff --git a/Modules/FindCUDA/make2cmake.cmake b/Modules/FindCUDA/make2cmake.cmake index 1b53d177d..7b5389ec5 100644 --- a/Modules/FindCUDA/make2cmake.cmake +++ b/Modules/FindCUDA/make2cmake.cmake @@ -35,14 +35,23 @@ # This converts a file written in makefile syntax into one that can be included # by CMake. +# Input variables +# +# verbose:BOOL=<> OFF: Be as quiet as possible (default) +# ON : Extra output +# +# input_file:FILEPATH=<> Path to dependecy file in makefile format +# +# output_file:FILEPATH=<> Path to file with dependencies in CMake readable variable +# + file(READ ${input_file} depend_text) -if (${depend_text} MATCHES ".+") +if (NOT "${depend_text}" STREQUAL "") # message("FOUND DEPENDS") - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE "\\\\ " " " depend_text ${depend_text}) + string(REPLACE "\\ " " " depend_text ${depend_text}) # This works for the nvcc -M generated dependency files. string(REGEX REPLACE "^.* : " "" depend_text ${depend_text}) @@ -63,12 +72,16 @@ if (${depend_text} MATCHES ".+") if (EXISTS "/${file}") set(file "/${file}") else() - message(WARNING " Removing non-existent dependency file: ${file}") + if(verbose) + message(WARNING " Removing non-existent dependency file: ${file}") + endif() set(file "") endif() endif() - if(NOT IS_DIRECTORY "${file}") + # Make sure we check to see if we have a file, before asking if it is not a directory. + # if(NOT IS_DIRECTORY "") will return TRUE. + if(file AND NOT IS_DIRECTORY "${file}") # If softlinks start to matter, we should change this to REALPATH. For now we need # to flatten paths, because nvcc can generate stuff like /bin/../include instead of # just /include. @@ -87,7 +100,7 @@ list(REMOVE_DUPLICATES dependency_list) list(SORT dependency_list) foreach(file ${dependency_list}) - set(cuda_nvcc_depend "${cuda_nvcc_depend} \"${file}\"\n") + string(APPEND cuda_nvcc_depend " \"${file}\"\n") endforeach() file(WRITE ${output_file} "# Generated by: make2cmake.cmake\nSET(CUDA_NVCC_DEPEND\n ${cuda_nvcc_depend})\n\n") diff --git a/Modules/FindCUDA/parse_cubin.cmake b/Modules/FindCUDA/parse_cubin.cmake index 94be7e2cb..626c8a2e4 100644 --- a/Modules/FindCUDA/parse_cubin.cmake +++ b/Modules/FindCUDA/parse_cubin.cmake @@ -37,11 +37,10 @@ file(READ ${input_file} file_text) -if (${file_text} MATCHES ".+") +if (NOT "${file_text}" STREQUAL "") - # Remember, four backslashes is escaped to one backslash in the string. - string(REGEX REPLACE ";" "\\\\;" file_text ${file_text}) - string(REGEX REPLACE "\ncode" ";code" file_text ${file_text}) + string(REPLACE ";" "\\;" file_text ${file_text}) + string(REPLACE "\ncode" ";code" file_text ${file_text}) list(LENGTH file_text len) @@ -57,7 +56,7 @@ if (${file_text} MATCHES ".+") # Extract kernel names. if (${entry} MATCHES "[^g]name = ([^ ]+)") - string(REGEX REPLACE ".* = ([^ ]+)" "\\1" entry ${entry}) + set(entry "${CMAKE_MATCH_1}") # Check to see if the kernel name starts with "_" set(skip FALSE) @@ -76,19 +75,19 @@ if (${file_text} MATCHES ".+") # Registers if (${entry} MATCHES "reg([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) + set(entry "${CMAKE_MATCH_3}") message("Registers: ${entry}") endif() # Local memory if (${entry} MATCHES "lmem([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) + set(entry "${CMAKE_MATCH_3}") message("Local: ${entry}") endif() # Shared memory if (${entry} MATCHES "smem([ ]+)=([ ]+)([^ ]+)") - string(REGEX REPLACE ".*([ ]+)=([ ]+)([^ ]+)" "\\3" entry ${entry}) + set(entry "${CMAKE_MATCH_3}") message("Shared: ${entry}") endif() diff --git a/Modules/FindCUDA/run_nvcc.cmake b/Modules/FindCUDA/run_nvcc.cmake index f0aac8487..28cc1e99e 100644 --- a/Modules/FindCUDA/run_nvcc.cmake +++ b/Modules/FindCUDA/run_nvcc.cmake @@ -62,7 +62,7 @@ set(cmake_dependency_file "@cmake_dependency_file@") # path set(CUDA_make2cmake "@CUDA_make2cmake@") # path set(CUDA_parse_cubin "@CUDA_parse_cubin@") # path set(build_cubin @build_cubin@) # bool -set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # bool +set(CUDA_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path # We won't actually use these variables for now, but we need to set this, in # order to force this file to be run again if it changes. set(generated_file_path "@generated_file_path@") # path @@ -73,8 +73,24 @@ set(CUDA_NVCC_EXECUTABLE "@CUDA_NVCC_EXECUTABLE@") # path set(CUDA_NVCC_FLAGS @CUDA_NVCC_FLAGS@ ;; @CUDA_WRAP_OPTION_NVCC_FLAGS@) # list @CUDA_NVCC_FLAGS_CONFIG@ set(nvcc_flags @nvcc_flags@) # list -set(CUDA_NVCC_INCLUDE_ARGS "@CUDA_NVCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly). +set(CUDA_NVCC_INCLUDE_DIRS "@CUDA_NVCC_INCLUDE_DIRS@") # list (needs to be in quotes to handle spaces properly). +set(CUDA_NVCC_COMPILE_DEFINITIONS "@CUDA_NVCC_COMPILE_DEFINITIONS@") # list (needs to be in quotes to handle spaces properly). set(format_flag "@format_flag@") # string +set(cuda_language_flag @cuda_language_flag@) # list + +# Clean up list of include directories and add -I flags +list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS) +set(CUDA_NVCC_INCLUDE_ARGS) +foreach(dir ${CUDA_NVCC_INCLUDE_DIRS}) + # Extra quotes are added around each flag to help nvcc parse out flags with spaces. + list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}") +endforeach() + +# Clean up list of compile definitions, add -D flags, and append to nvcc_flags +list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS) +foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS}) + list(APPEND nvcc_flags "-D${def}") +endforeach() if(build_cubin AND NOT generated_cubin_file) message(FATAL_ERROR "You must specify generated_cubin_file on the command line") @@ -94,7 +110,7 @@ string(TOUPPER "${build_configuration}" build_configuration) #message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}") foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) # Extra quotes are added around each flag to help nvcc parse out flags with spaces. - set(nvcc_host_compiler_flags "${nvcc_host_compiler_flags},\"${flag}\"") + string(APPEND nvcc_host_compiler_flags ",\"${flag}\"") endforeach() if (nvcc_host_compiler_flags) set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags}) @@ -106,7 +122,7 @@ list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}}) # Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 ) list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 ) -if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 ) +if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER ) if (CUDA_HOST_COMPILER STREQUAL "$(VCInstallDir)bin" AND DEFINED CCBIN) set(CCBIN -ccbin "${CCBIN}") else() @@ -126,7 +142,7 @@ endif() # and other return variables are present after executing the process. macro(cuda_execute_process status command) set(_command ${command}) - if(NOT _command STREQUAL "COMMAND") + if(NOT "x${_command}" STREQUAL "xCOMMAND") message(FATAL_ERROR "Malformed call to cuda_execute_process. Missing COMMAND as second argument. (command = ${command})") endif() if(verbose) @@ -206,6 +222,7 @@ cuda_execute_process( COMMAND "${CMAKE_COMMAND}" -D "input_file:FILEPATH=${NVCC_generated_dependency_file}" -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" + -D "verbose=${verbose}" -P "${CUDA_make2cmake}" ) @@ -238,6 +255,7 @@ cuda_execute_process( "Generating ${generated_file}" COMMAND "${CUDA_NVCC_EXECUTABLE}" "${source_file}" + ${cuda_language_flag} ${format_flag} -o "${generated_file}" ${CCBIN} ${nvcc_flags} diff --git a/Modules/FindCUDA/select_compute_arch.cmake b/Modules/FindCUDA/select_compute_arch.cmake new file mode 100644 index 000000000..8fb44d80a --- /dev/null +++ b/Modules/FindCUDA/select_compute_arch.cmake @@ -0,0 +1,198 @@ +# Synopsis: +# CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures]) +# -- Selects GPU arch flags for nvcc based on target_CUDA_architectures +# target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...) +# - "Auto" detects local machine GPU compute arch at runtime. +# - "Common" and "All" cover common and entire subsets of architectures +# ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX +# NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal +# NUM: Any number. Only those pairs are currently accepted by NVCC though: +# 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 +# Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable} +# Additionally, sets ${out_variable}_readable to the resulting numeric list +# Example: +# CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell) +# LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) +# +# More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA +# + +# This list will be used for CUDA_ARCH_NAME = All option +set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell") + +# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default) +set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0") + +if (CUDA_VERSION VERSION_GREATER "6.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2") +endif () + +if (CUDA_VERSION VERSION_GREATER "7.5") + list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal") + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX") +else() + list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX") +endif () + + + +################################################################################################ +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# CUDA_DETECT_INSTALLED_GPUS(OUT_VARIABLE) +# +function(CUDA_DETECT_INSTALLED_GPUS OUT_VARIABLE) + if(NOT CUDA_GPU_DETECT_OUTPUT) + set(file ${PROJECT_BINARY_DIR}/detect_cuda_compute_capabilities.cpp) + + file(WRITE ${file} "" + "#include <cuda_runtime.h>\n" + "#include <cstdio>\n" + "int main()\n" + "{\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device)\n" + " {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + + try_run(run_result compile_result ${PROJECT_BINARY_DIR} ${file} + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" + LINK_LIBRARIES ${CUDA_LIBRARIES} + RUN_OUTPUT_VARIABLE compute_capabilities) + + if(run_result EQUAL 0) + string(REPLACE "2.1" "2.1(2.0)" compute_capabilities "${compute_capabilities}") + set(CUDA_GPU_DETECT_OUTPUT ${compute_capabilities} + CACHE INTERNAL "Returned GPU architectures from detect_gpus tool" FORCE) + endif() + endif() + + if(NOT CUDA_GPU_DETECT_OUTPUT) + message(STATUS "Automatic GPU detection failed. Building for common architectures.") + set(${OUT_VARIABLE} ${CUDA_COMMON_GPU_ARCHITECTURES} PARENT_SCOPE) + else() + set(${OUT_VARIABLE} ${CUDA_GPU_DETECT_OUTPUT} PARENT_SCOPE) + endif() +endfunction() + + +################################################################################################ +# Function for selecting GPU arch flags for nvcc based on CUDA architectures from parameter list +# Usage: +# SELECT_NVCC_ARCH_FLAGS(out_variable [list of CUDA compute archs]) +function(CUDA_SELECT_NVCC_ARCH_FLAGS out_variable) + set(CUDA_ARCH_LIST "${ARGN}") + + if("X${CUDA_ARCH_LIST}" STREQUAL "X" ) + set(CUDA_ARCH_LIST "Auto") + endif() + + set(cuda_arch_bin) + set(cuda_arch_ptx) + + if("${CUDA_ARCH_LIST}" STREQUAL "All") + set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Common") + set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES}) + elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto") + CUDA_DETECT_INSTALLED_GPUS(CUDA_ARCH_LIST) + message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}") + endif() + + # Now process the list and look for names + string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}") + list(REMOVE_DUPLICATES CUDA_ARCH_LIST) + foreach(arch_name ${CUDA_ARCH_LIST}) + set(arch_bin) + set(arch_ptx) + set(add_ptx FALSE) + # Check to see if we are compiling PTX + if(arch_name MATCHES "(.*)\\+PTX$") + set(add_ptx TRUE) + set(arch_name ${CMAKE_MATCH_1}) + endif() + if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$") + set(arch_bin ${CMAKE_MATCH_1}) + set(arch_ptx ${arch_bin}) + else() + # Look for it in our list of known architectures + if(${arch_name} STREQUAL "Fermi") + set(arch_bin 2.0 "2.1(2.0)") + elseif(${arch_name} STREQUAL "Kepler+Tegra") + set(arch_bin 3.2) + elseif(${arch_name} STREQUAL "Kepler+Tesla") + set(arch_bin 3.7) + elseif(${arch_name} STREQUAL "Kepler") + set(arch_bin 3.0 3.5) + set(arch_ptx 3.5) + elseif(${arch_name} STREQUAL "Maxwell+Tegra") + set(arch_bin 5.3) + elseif(${arch_name} STREQUAL "Maxwell") + set(arch_bin 5.0 5.2) + set(arch_ptx 5.2) + elseif(${arch_name} STREQUAL "Pascal") + set(arch_bin 6.0 6.1) + set(arch_ptx 6.1) + else() + message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS") + endif() + endif() + if(NOT arch_bin) + message(SEND_ERROR "arch_bin wasn't set for some reason") + endif() + list(APPEND cuda_arch_bin ${arch_bin}) + if(add_ptx) + if (NOT arch_ptx) + set(arch_ptx ${arch_bin}) + endif() + list(APPEND cuda_arch_ptx ${arch_ptx}) + endif() + endforeach() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + + if(cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_bin) + endif() + if(cuda_arch_ptx) + list(REMOVE_DUPLICATES cuda_arch_ptx) + endif() + + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified ARCH for the concrete CODE + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) +endfunction() |