Imported Upstream version 3.9.4upstream/3.9.4

author: MyungJoo Ham <myungjoo.ham@samsung.com> 2017-10-11 15:16:57 +0900
committer: MyungJoo Ham <myungjoo.ham@samsung.com> 2017-10-11 15:16:57 +0900
commit: 915c76ded744c0f5f151402b9fa69f3fd8452573 (patch)
tree: ca6a387466543248890f346847acaa8343989b22 /Tests/CudaOnly
parent: 317dbdb79761ef65e45c7358cfc7571c6afa54ad (diff)
download: cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.gz
cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.bz2
cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.zip
30 files changed, 771 insertions, 0 deletions
diff --git a/Tests/CudaOnly/.clang-format b/Tests/CudaOnly/.clang-format
new file mode 100644
index 000000000..a77589a2d
--- /dev/null
+++ b/Tests/CudaOnly/.clang-format
@@ -0,0 +1,9 @@
+---
+# This configuration requires clang-format 3.8 or higher.
+BasedOnStyle: Mozilla
+AlignOperands: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakAfterDefinitionReturnType: None
+ColumnLimit: 79
+Standard: Cpp11
+...
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
new file mode 100644
index 000000000..5f456fcf4
--- /dev/null
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -0,0 +1,6 @@
+
+ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
+ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
+ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
diff --git a/Tests/CudaOnly/EnableStandard/CMakeLists.txt b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
new file mode 100644
index 000000000..35a1deb69
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
@@ -0,0 +1,26 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyEnableStandard CUDA)
+
+#Goal for this example:
+#build cuda sources that require C++11 to be enabled.
+
+add_library(CUDAStatic11 STATIC static.cu)
+add_library(CUDADynamic11 SHARED shared.cu)
+
+add_executable(CudaOnlyEnableStandard main.cu)
+target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
+
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
+
+#Verify CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
+foreach(dir ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  if(NOT IS_DIRECTORY "${dir}")
+    message(FATAL_ERROR
+      "CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES entry\n"
+      " ${dir}\n"
+      "is not an existing directory."
+      )
+  endif()
+endforeach()
diff --git a/Tests/CudaOnly/EnableStandard/main.cu b/Tests/CudaOnly/EnableStandard/main.cu
new file mode 100644
index 000000000..f21958371
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/main.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#ifdef _WIN32
+#define IMPORT __declspec(dllimport)
+#else
+#define IMPORT
+#endif
+
+int static_cuda11_func(int);
+IMPORT int shared_cuda11_func(int);
+
+void test_functions()
+{
+  static_cuda11_func(int(42));
+  shared_cuda11_func(int(42));
+}
+
+int main(int argc, char** argv)
+{
+  test_functions();
+  return 0;
+}
diff --git a/Tests/CudaOnly/EnableStandard/shared.cu b/Tests/CudaOnly/EnableStandard/shared.cu
new file mode 100644
index 000000000..ccdd0b28e
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/shared.cu
@@ -0,0 +1,15 @@
+
+#include <type_traits>
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#else
+#define EXPORT
+#endif
+
+using tt = std::true_type;
+using ft = std::false_type;
+EXPORT int __host__ shared_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/EnableStandard/static.cu b/Tests/CudaOnly/EnableStandard/static.cu
new file mode 100644
index 000000000..73e43a865
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/static.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
new file mode 100644
index 000000000..10249c614
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -0,0 +1,82 @@
+cmake_minimum_required(VERSION 3.8)
+project (CudaOnlyExportPTX CUDA)
+
+#Goal for this example:
+# How to generate PTX files instead of OBJECT files
+# How to reference PTX files for custom commands
+# How to install PTX files
+
+add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
+set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
+
+#Test ObjectFiles with file(GENERATE)
+file(GENERATE
+     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$<LOWER_CASE:$<CONFIG>/>path_to_objs.h
+     CONTENT [[
+
+#include <vector>
+#include <string>
+
+#ifndef path_to_objs
+#define path_to_objs
+
+static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
+
+#endif
+
+]]
+)
+#We are going to need a wrapper around bin2c for multiple reasons
+# 1. bin2c only converts a single file at a time
+# 2. bin2c has only standard out support, so we have to manually
+# redirect to a cmake buffer
+# 3. We want to pack everything into a single output file, so we
+# need to also pass the --name option
+set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
+
+get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+find_program(bin_to_c
+  NAMES bin2c
+  PATHS ${cuda_compiler_bin}
+  )
+if(NOT bin_to_c)
+  message(FATAL_ERROR
+    "bin2c not found:\n"
+    "  CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
+    "  cuda_compiler_bin='${cuda_compiler_bin}'\n"
+    )
+endif()
+
+add_custom_command(
+  OUTPUT "${output_file}"
+  COMMAND ${CMAKE_COMMAND}
+    "-DBIN_TO_C_COMMAND=${bin_to_c}"
+    "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
+    "-DOUTPUT=${output_file}"
+    -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake
+  VERBATIM
+  DEPENDS $<TARGET_OBJECTS:CudaPTX>
+  COMMENT "Converting Object files to a C header"
+  )
+
+add_executable(CudaOnlyExportPTX main.cu ${output_file})
+add_dependencies(CudaOnlyExportPTX CudaPTX)
+target_include_directories(CudaOnlyExportPTX PRIVATE
+                           ${CMAKE_CURRENT_BINARY_DIR} )
+target_compile_definitions(CudaOnlyExportPTX PRIVATE
+                           "CONFIG_TYPE=gen_$<LOWER_CASE:$<CONFIG>>")
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaOnlyExportPTX PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
+
+#Verify that we can install object targets properly
+install(TARGETS CudaPTX CudaOnlyExportPTX
+   EXPORT cudaPTX
+   RUNTIME DESTINATION bin
+   LIBRARY DESTINATION lib
+   OBJECTS DESTINATION objs
+   )
+install(EXPORT cudaPTX DESTINATION lib/cudaPTX)
diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
new file mode 100644
index 000000000..0baf93434
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
@@ -0,0 +1,19 @@
+
+set(file_contents)
+foreach(obj ${OBJECTS})
+  get_filename_component(obj_ext ${obj} EXT)
+  get_filename_component(obj_name ${obj} NAME_WE)
+  get_filename_component(obj_dir ${obj} DIRECTORY)
+
+  if(obj_ext MATCHES ".ptx")
+    set(args --name ${obj_name} ${obj})
+    execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args}
+                    WORKING_DIRECTORY ${obj_dir}
+                    RESULT_VARIABLE result
+                    OUTPUT_VARIABLE output
+                    ERROR_VARIABLE error_var
+                    )
+    set(file_contents "${file_contents} \n${output}")
+  endif()
+endforeach()
+file(WRITE "${OUTPUT}" "${file_contents}")
diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu
new file mode 100644
index 000000000..fbe0d26d7
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/kernelA.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+  for (int i = threadIdx.x; i < size; i += blockDim.x) {
+    r[i] = x[i] * y[i] + z[i];
+  }
+}
diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu
new file mode 100644
index 000000000..11872e4c2
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/kernelB.cu
@@ -0,0 +1,8 @@
+
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+  for (int i = threadIdx.x; i < size; i += blockDim.x) {
+    r[i] = x[i] * y[i] + z[i];
+  }
+}
diff --git a/Tests/CudaOnly/ExportPTX/main.cu b/Tests/CudaOnly/ExportPTX/main.cu
new file mode 100644
index 000000000..132377c42
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/main.cu
@@ -0,0 +1,28 @@
+
+#include <iostream>
+
+/*
+  Define GENERATED_HEADER macro to allow c++ files to include headers
+  generated based on different configuration types.
+*/
+
+/* clang-format off */
+#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x)
+/* clang-format on */
+#define GENERATED_HEADER0(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+#include GENERATED_HEADER(path_to_objs.h)
+
+#include "embedded_objs.h"
+
+int main(int argc, char** argv)
+{
+  (void)argc;
+  (void)argv;
+
+  unsigned char* ka = kernelA;
+  unsigned char* kb = kernelB;
+
+  return (ka != NULL && kb != NULL) ? 0 : 1;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
new file mode 100644
index 000000000..b96bb984b
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyResolveDeviceSymbols CUDA)
+
+# Find nm and dumpbin
+if(CMAKE_NM)
+  set(dump_command ${CMAKE_NM})
+  set(dump_args -g)
+else()
+  include(GetPrerequisites)
+  message(STATUS "calling list_prerequisites to find dumpbin")
+  list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
+  if(gp_dumpbin)
+    set(dump_command ${gp_dumpbin})
+    set(dump_args /ARCHIVEMEMBERS)
+  endif()
+endif()
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#Use a custom command to build an executable that uses this static library
+#We do these together to verify that we can get a static library to do
+#device symbol linking, and not have it done when the executable is made
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+set_target_properties(CUDAResolveDeviceLib
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON
+                      CUDA_RESOLVE_DEVICE_SYMBOLS ON
+                      POSITION_INDEPENDENT_CODE ON)
+
+if(dump_command)
+add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
+  COMMAND ${CMAKE_COMMAND}
+  -DDUMP_COMMAND=${dump_command}
+  -DDUMP_ARGS=${dump_args}
+  -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib>
+  -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
+  )
+endif()
+
+add_executable(CudaOnlyResolveDeviceSymbols main.cu)
+target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
new file mode 100644
index 000000000..1ce63bf98
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x * x;
+  return r;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
new file mode 100644
index 000000000..ff1945c0a
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
new file mode 100644
index 000000000..278fd6cce
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
@@ -0,0 +1,25 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  const result_type r = file1_func(x);
+  const result_type_dynamic rd{ r.input, r.sum, true };
+  return rd;
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file2_func(x);
+}
+
+int file2_launch_kernel(int x)
+{
+  result_type_dynamic r;
+  file2_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
new file mode 100644
index 000000000..d2dbaa4de
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
new file mode 100644
index 000000000..b4b5b9eac
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
@@ -0,0 +1,85 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file2_launch_kernel(int x);
+
+result_type_dynamic __device__ file2_func(int x);
+static __global__ void main_kernel(result_type_dynamic& r, int x)
+{
+  // call function that was not device linked to us, this will cause
+  // a runtime failure of "invalid device function"
+  r = file2_func(x);
+}
+
+int main_launch_kernel(int x)
+{
+  result_type_dynamic r;
+  main_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
+
+int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+    std::cout << "prop.major: " << prop.major << std::endl;
+    if (prop.major >= 3) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+            << std::endl;
+
+  return 1;
+}
+
+int main(int argc, char** argv)
+{
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  cudaError_t err;
+  file2_launch_kernel(42);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file2_launch_kernel: kernel launch failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+
+  main_launch_kernel(1);
+  err = cudaGetLastError();
+  if (err == cudaSuccess) {
+    // This kernel launch should fail as the file2_func was device linked
+    // into the static library and is not usable by the executable
+    std::cerr << "main_launch_kernel: kernel launch should have failed"
+              << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
new file mode 100644
index 000000000..94d388b10
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
@@ -0,0 +1,14 @@
+execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
+  RESULT_VARIABLE RESULT
+  OUTPUT_VARIABLE OUTPUT
+  ERROR_VARIABLE ERROR
+)
+
+if(NOT "${RESULT}" STREQUAL "0")
+  message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
+endif()
+
+if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)")
+  message(FATAL_ERROR
+    "No cuda device objects found, device linking did not occur")
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
new file mode 100644
index 000000000..3d4a17073
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -0,0 +1,55 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlySeparateCompilation CUDA)
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#After that confirm that we can call those methods from dynamic libraries
+#and executables.
+#We complicate the matter by also testing that multiple static libraries
+#all containing cuda separable compilation code links properly
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
+
+if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC")
+  # Test adding a flag that is not in our CUDA flag table for VS.
+  if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 8)
+    string(APPEND CMAKE_CUDA_FLAGS " --ftemplate-depth 50")
+  endif()
+  # Test adding a flag that nvcc should pass to the host compiler.
+  target_compile_options(CUDASeparateLibA PRIVATE -Xcompiler=-bigobj)
+endif()
+
+#Having file4/file5 in a shared library causes serious problems
+#with the nvcc linker and it will generate bad entries that will
+#cause a segv when trying to run the executable
+#
+add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
+target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
+
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation
+                      PRIVATE CUDASeparateLibB)
+
+set_target_properties(CUDASeparateLibA
+                      CUDASeparateLibB
+                      PROPERTIES CUDA_SEPARABLE_COMPILATION ON
+                      POSITION_INDEPENDENT_CODE ON)
+
+if (CMAKE_GENERATOR MATCHES "^Visual Studio")
+  #Visual Studio CUDA integration will not perform device linking
+  #on a target that itself does not have GenerateRelocatableDeviceCode
+  #enabled.
+  set_target_properties(CudaOnlySeparateCompilation
+                        PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if (APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.cu b/Tests/CudaOnly/SeparateCompilation/file1.cu
new file mode 100644
index 000000000..1ce63bf98
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x * x;
+  return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.h b/Tests/CudaOnly/SeparateCompilation/file1.h
new file mode 100644
index 000000000..ff1945c0a
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.cu b/Tests/CudaOnly/SeparateCompilation/file2.cu
new file mode 100644
index 000000000..74f35580d
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.cu
@@ -0,0 +1,16 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if (x != 42) {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd{ r.input, r.sum, true };
+    return rd;
+  } else {
+    const result_type_dynamic rd{ x, x * x * x, false };
+    return rd;
+  }
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.h b/Tests/CudaOnly/SeparateCompilation/file2.h
new file mode 100644
index 000000000..d2dbaa4de
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file3.cu b/Tests/CudaOnly/SeparateCompilation/file3.cu
new file mode 100644
index 000000000..155b513bb
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file3.cu
@@ -0,0 +1,22 @@
+
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file3_kernel(result_type& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+result_type file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel<<<1, 1>>>(r, x);
+  return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu
new file mode 100644
index 000000000..2e3e01e32
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file4.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file4_kernel(result_type& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file4_launch_kernel(int x)
+{
+  result_type r;
+  file4_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu
new file mode 100644
index 000000000..fee8e9e10
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file5.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file5_kernel(result_type& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file5_launch_kernel(int x)
+{
+  result_type r;
+  file5_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
new file mode 100644
index 000000000..40dbe5d7e
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -0,0 +1,68 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file4_launch_kernel(int x);
+int file5_launch_kernel(int x);
+
+int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+    if (prop.major >= 3) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+            << std::endl;
+
+  return 1;
+}
+
+int main(int argc, char** argv)
+{
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  cudaError_t err;
+  file4_launch_kernel(42);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file4_launch_kernel: kernel launch failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+
+  file5_launch_kernel(42);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file5_launch_kernel: kernel launch failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt
new file mode 100644
index 000000000..e25f141ea
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt
@@ -0,0 +1,44 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyWithDefs CUDA)
+
+#verify that we can pass explicit cuda arch flags
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(debug_compile_flags --generate-code arch=compute_20,code=sm_20)
+if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC")
+  list(APPEND debug_compile_flags -Xcompiler=-WX)
+else()
+  list(APPEND debug_compile_flags -Xcompiler=-Werror)
+endif()
+set(release_compile_defs DEFREL)
+
+#Goal for this example:
+#build a executable that needs to be passed a complex define through add_defintions
+#this verifies we can pass things such as '_','(' to nvcc
+add_definitions("-DPACKED_DEFINE=__attribute__((packed))")
+
+if(CMAKE_GENERATOR MATCHES "Visual Studio")
+  # CUDA MSBuild rules do not pass '-x cu' to nvcc
+  set(main main_for_vs.cu)
+else()
+  set(main main.notcu)
+  set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA)
+endif()
+add_executable(CudaOnlyWithDefs ${main})
+
+target_compile_options(CudaOnlyWithDefs
+  PRIVATE
+    -Xcompiler=-DHOST_DEFINE
+    $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>>
+  )
+
+target_compile_definitions(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>>
+  )
+
+#we need to add an rpath for the cuda library so that everything
+#loads properly on the mac
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  set_target_properties(CudaOnlyWithDefs PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
+endif()
diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu
new file mode 100644
index 000000000..d2eff3f4b
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/main.notcu
@@ -0,0 +1,57 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+#ifndef HOST_DEFINE
+#error "HOST_DEFINE not defined!"
+#endif
+
+#ifndef PACKED_DEFINE
+#error "PACKED_DEFINE not defined!"
+#endif
+
+static __global__ void DetermineIfValidCudaDevice()
+{
+}
+
+#ifdef _MSC_VER
+#pragma pack(push, 1)
+#undef PACKED_DEFINE
+#define PACKED_DEFINE
+#endif
+struct PACKED_DEFINE result_type
+{
+  bool valid;
+  int value;
+#if defined(NDEBUG) && !defined(DEFREL)
+#error missing DEFREL flag
+#endif
+};
+#ifdef _MSC_VER
+#pragma pack(pop)
+#endif
+
+result_type can_launch_kernel()
+{
+  result_type r;
+  DetermineIfValidCudaDevice<<<1, 1>>>();
+  r.valid = (cudaSuccess == cudaGetLastError());
+  if (r.valid) {
+    r.value = 1;
+  } else {
+    r.value = -1;
+  }
+  return r;
+}
+
+int main(int argc, char** argv)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
+  return 0;
+}
diff --git a/Tests/CudaOnly/WithDefs/main_for_vs.cu b/Tests/CudaOnly/WithDefs/main_for_vs.cu
new file mode 100644
index 000000000..56078e7af
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/main_for_vs.cu
@@ -0,0 +1 @@
+#include "main.notcu"
author	MyungJoo Ham <myungjoo.ham@samsung.com>	2017-10-11 15:16:57 +0900
committer	MyungJoo Ham <myungjoo.ham@samsung.com>	2017-10-11 15:16:57 +0900
commit	915c76ded744c0f5f151402b9fa69f3fd8452573 (patch)
tree	ca6a387466543248890f346847acaa8343989b22 /Tests/CudaOnly
parent	317dbdb79761ef65e45c7358cfc7571c6afa54ad (diff)
download	cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.gz cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.tar.bz2 cmake-915c76ded744c0f5f151402b9fa69f3fd8452573.zip