diff options
author | Grigory Arutyunov <arutyunovg@yandex.ru> | 2019-03-01 15:07:18 -0800 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-03-01 15:17:41 -0800 |
commit | 2336f0ba062d3c00117ccefc49c07bf2626284f1 (patch) | |
tree | c270f04ea3081fe394e69908bf68cd744c27285c /c10/macros | |
parent | 06c8aa7a3bbd91cda2fd6255ec82aad21fa1c0d5 (diff) | |
download | pytorch-2336f0ba062d3c00117ccefc49c07bf2626284f1.tar.gz pytorch-2336f0ba062d3c00117ccefc49c07bf2626284f1.tar.bz2 pytorch-2336f0ba062d3c00117ccefc49c07bf2626284f1.zip |
msvc_fixes (#17201)
Summary:
Fixing MSVC errors
```
D:\pytorch-scripts\caffe2_builders\v141\pytorch\aten\src\THC/THCReduce.cuh(144): error C4002: too many actual paramet
ers for macro 'C10_LAUNCH_BOUNDS_1' [D:\pytorch-scripts\caffe2_builders\v141\pytorch\build\Debug\caffe2\caffe2_gpu.vcxp
roj]
D:\pytorch-scripts\caffe2_builders\v141\pytorch\aten\src\THC/THCReduce.cuh(259): error C4002: too many actual paramet
ers for macro 'C10_LAUNCH_BOUNDS_1' [D:\pytorch-scripts\caffe2_builders\v141\pytorch\build\Debug\caffe2\caffe2_gpu.vcxp
roj]
D:/pytorch-scripts/caffe2_builders/v141/pytorch/aten/src/THCUNN/SpatialDilatedMaxPooling.cu(51): error C4002: too man
y actual parameters for macro 'C10_LAUNCH_BOUNDS_1' [D:\pytorch-scripts\caffe2_builders\v141\pytorch\build\Debug\caffe2
\caffe2_gpu.vcxproj]
```
on variadic C10_LAUNCH_BOUNDS as well as Debug linking issues with at::Half in pool_op_cudnn.cc like this one
```
pool_op_cudnn.obj : error LNK2019: unresolved external symbol "public: bool __cdecl caffe2::MaxPoolFunctor<class caff
e2::CUDAContext>::GlobalPoolingBackward<struct c10::Half,2>(int,int,int,struct c10::Half const *,struct c10::Half const
,struct c10::Half const ,struct c10::Half ,class caffe2::CUDAContext )const " (??$GlobalPoolingBackward@UHalf@c10@
@$01@?$MaxPoolFunctor@VCUDAContext@caffe2@@caffe2@QEBA_NHHHPEBUHalf@c10@00PEAU23@PEAVCUDAContext@1@Z) referenced in
function "public: bool __cdecl caffe2::`anonymous namespace'::CuDNNMaxPoolFunctor::GlobalPoolingBackward<struct c10::H
alf,2>(int,int,int,struct c10::Half const ,struct c10::Half const ,struct c10::Half const ,struct c10::Half ,class
caffe2::CUDAContext *)const " (??$GlobalPoolingBackward@UHalf@c10@@$01@CuDNNMaxPoolFunctor@?A0xb936404a@caffe2@QEBA_NH
HHPEBUHalf@c10@00PEAU34@PEAVCUDAContext@2@Z) [D:\pytorch-scripts\caffe2_builders\v141\pytorch\build\Debug\caffe2\caff
e2_gpu.vcxproj]
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17201
Differential Revision: D14165732
Pulled By: ezyang
fbshipit-source-id: 875fd9a5b2db6f83fc483f6d750d2c011260eb8b
Diffstat (limited to 'c10/macros')
-rw-r--r-- | c10/macros/Macros.h | 3 |
1 files changed, 0 insertions, 3 deletions
diff --git a/c10/macros/Macros.h b/c10/macros/Macros.h index 884abe6b3a..93aa14b3c2 100644 --- a/c10/macros/Macros.h +++ b/c10/macros/Macros.h @@ -143,12 +143,9 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; #define C10_MAX_THREADS_PER_BLOCK(val) (((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) : CUDA_THREADS_PER_BLOCK_FALLBACK) #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) ((((threads_per_block)*(blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) ? (blocks_per_sm) : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) / (threads_per_block)))) // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__ -// https://stackoverflow.com/a/8814003 snippet to have macro with an optional argument #define C10_LAUNCH_BOUNDS_0 __launch_bounds__(256, 4) // default launch bounds that should give good occupancy and versatility across all architectures. #define C10_LAUNCH_BOUNDS_1(max_threads_per_block) __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block)))) #define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) __launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), (C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm)))) -#define C10_LAUNCH_BOUNDS_X(x,max_threads_per_block,min_blocks_per_sm,FUNC, ...) FUNC -#define C10_LAUNCH_BOUNDS(...) C10_LAUNCH_BOUNDS_X(,##__VA_ARGS__, C10_LAUNCH_BOUNDS_2(__VA_ARGS__), C10_LAUNCH_BOUNDS_1(__VA_ARGS__), C10_LAUNCH_BOUNDS_0(__VA_ARGS__)) #else #define C10_HOST_DEVICE #define C10_HOST |