diff options
author | Yangqing Jia <jiayq84@gmail.com> | 2016-01-12 15:44:15 -0800 |
---|---|---|
committer | Yangqing Jia <jiayq84@gmail.com> | 2016-01-12 15:44:15 -0800 |
commit | d08880e61a1c360bda15f6ea5442e4833737ab37 (patch) | |
tree | d62405a723bba9fac55a13db7772e446b55d7740 /caffe2/cuda_rtc/common_rtc.h | |
parent | fe78d1a44514d4683cd52a2bdeb2b177e9e297df (diff) | |
download | pytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.tar.gz pytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.tar.bz2 pytorch-d08880e61a1c360bda15f6ea5442e4833737ab37.zip |
more RTC experiments
Diffstat (limited to 'caffe2/cuda_rtc/common_rtc.h')
-rw-r--r-- | caffe2/cuda_rtc/common_rtc.h | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/caffe2/cuda_rtc/common_rtc.h b/caffe2/cuda_rtc/common_rtc.h index baaab06de0..665bab35cb 100644 --- a/caffe2/cuda_rtc/common_rtc.h +++ b/caffe2/cuda_rtc/common_rtc.h @@ -41,10 +41,11 @@ class CudaRTCFunction { // Compile the program. // TODO(Yangqing): how to find the current gpu architecture instead of hard // coding it? - //const char *nvrtc_opts[] = {"--gpu-architecture=compute_30", - // "--fmad=false"}; + CAFFE_LOG_ERROR << "NVRTC version: " << major << minor; + const char *nvrtc_opts[] = {"--gpu-architecture=compute_35", + "--use_fast_math"}; nvrtcResult compile_result = nvrtcCompileProgram( - prog, 0, nullptr); + prog, 2, nvrtc_opts); if (compile_result != NVRTC_SUCCESS) { size_t log_size; NVRTC_CHECK(nvrtcGetProgramLogSize(prog, &log_size)); @@ -103,4 +104,4 @@ inline string GetUniqueName() { } // namepsace caffe2 -#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_
\ No newline at end of file +#endif // CAFFE2_CUDA_RTC_COMMON_RTC_H_ |