summaryrefslogtreecommitdiff
path: root/caffe2/cuda_rtc/common_rtc.h
diff options
context:
space:
mode:
authorYangqing Jia <jiayq84@gmail.com>2016-01-12 15:44:15 -0800
committerYangqing Jia <jiayq84@gmail.com>2016-01-12 15:44:15 -0800
commitd08880e61a1c360bda15f6ea5442e4833737ab37 (patch)
treed62405a723bba9fac55a13db7772e446b55d7740 /caffe2/cuda_rtc/common_rtc.h
parentfe78d1a44514d4683cd52a2bdeb2b177e9e297df (diff)
downloadpytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.tar.gz
pytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.tar.bz2
pytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.zip
more RTC experiments
Diffstat (limited to 'caffe2/cuda_rtc/common_rtc.h')
-rw-r--r--caffe2/cuda_rtc/common_rtc.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/caffe2/cuda_rtc/common_rtc.h b/caffe2/cuda_rtc/common_rtc.h
index baaab06de0..665bab35cb 100644
--- a/caffe2/cuda_rtc/common_rtc.h
+++ b/caffe2/cuda_rtc/common_rtc.h
@@ -41,10 +41,11 @@ class CudaRTCFunction {
// Compile the program.
// TODO(Yangqing): how to find the current gpu architecture instead of hard
// coding it?
- //const char *nvrtc_opts[] = {"--gpu-architecture=compute_30",
- // "--fmad=false"};
+ CAFFE_LOG_ERROR << "NVRTC version: " << major << minor;
+ const char *nvrtc_opts[] = {"--gpu-architecture=compute_35",
+ "--use_fast_math"};
nvrtcResult compile_result = nvrtcCompileProgram(
- prog, 0, nullptr);
+ prog, 2, nvrtc_opts);
if (compile_result != NVRTC_SUCCESS) {
size_t log_size;
NVRTC_CHECK(nvrtcGetProgramLogSize(prog, &log_size));
@@ -103,4 +104,4 @@ inline string GetUniqueName() {
} // namepsace caffe2
-#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_ \ No newline at end of file
+#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_