diff options
author | Jerry Zhang <jerryzh@fb.com> | 2018-10-09 10:47:24 -0700 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-10-09 10:53:52 -0700 |
commit | 1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f (patch) | |
tree | ff1ba37b7ca985c59274faef79e979d3bb83d4a6 | |
parent | f564163951b15a14f777f01f0caf1b8ce64d3f00 (diff) | |
download | pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.gz pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.bz2 pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.zip |
Remove New with Allocator Registry (#12111)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/12111
Setup allocator registry keyed by at::DeviceType, and remove New from StaticContext.
Reviewed By: ezyang
Differential Revision: D10022853
fbshipit-source-id: 3e88a181fe5df24f33f49b88be1f75284a185588
-rw-r--r-- | aten/src/ATen/core/Allocator.cpp | 17 | ||||
-rw-r--r-- | aten/src/ATen/core/Allocator.h | 28 | ||||
-rw-r--r-- | aten/src/ATen/core/TensorImpl.h | 7 | ||||
-rw-r--r-- | aten/src/ATen/core/context_base.h | 4 | ||||
-rw-r--r-- | caffe2/core/allocator.cc | 12 | ||||
-rw-r--r-- | caffe2/core/context.h | 5 | ||||
-rw-r--r-- | caffe2/core/context_gpu.cu | 17 | ||||
-rw-r--r-- | caffe2/core/context_gpu.h | 7 | ||||
-rw-r--r-- | caffe2/core/hip/context_hip.cc | 15 | ||||
-rw-r--r-- | caffe2/core/hip/context_hip.h | 7 | ||||
-rw-r--r-- | caffe2/ideep/utils/ideep_context.h | 6 | ||||
-rw-r--r-- | caffe2/mkl/utils/mkl_context.h | 6 |
12 files changed, 75 insertions, 56 deletions
diff --git a/aten/src/ATen/core/Allocator.cpp b/aten/src/ATen/core/Allocator.cpp index d6c07cecc8..3a958cc277 100644 --- a/aten/src/ATen/core/Allocator.cpp +++ b/aten/src/ATen/core/Allocator.cpp @@ -17,3 +17,20 @@ at::DataPtr InefficientStdFunctionContext::makeDataPtr( } } // namespace at + +namespace caffe2 { + +CAFFE2_API at::Allocator* allocator_array[static_cast<int>( + at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)]; + +void SetAllocator(at::DeviceType t, at::Allocator* alloc) { + allocator_array[static_cast<int>(t)] = alloc; +} + +at::Allocator* GetAllocator(const at::DeviceType& t) { + auto* alloc = allocator_array[static_cast<int>(t)]; + AT_ASSERTM(alloc, "Allocator for ", t, " is not set."); + return alloc; +} + +} // namespace caffe2 diff --git a/aten/src/ATen/core/Allocator.h b/aten/src/ATen/core/Allocator.h index c8c8a236fb..239698901c 100644 --- a/aten/src/ATen/core/Allocator.h +++ b/aten/src/ATen/core/Allocator.h @@ -133,3 +133,31 @@ struct CAFFE2_API InefficientStdFunctionContext { }; } // namespace at + +namespace caffe2 { + +/** Set the allocator for DeviceType `t`. The passed in allocator pointer is + * expected to have static lifetime; this function does NOT take ownership + * of the raw pointer. (The reason for this is to prevent existing pointers + * to an allocator of a particular device from being invalidated when + * SetAllocator is called.) + * + * Also note that this is not thraed-safe, and we assume this function will + * only be called during initialization. + */ +CAFFE2_API void SetAllocator(at::DeviceType t, at::Allocator* alloc); +CAFFE2_API at::Allocator* GetAllocator(const at::DeviceType& t); + +template <at::DeviceType t> +struct AllocatorRegisterer { + explicit AllocatorRegisterer(at::Allocator* alloc) { + SetAllocator(t, alloc); + } +}; + +#define REGISTER_ALLOCATOR(t, f) \ + namespace { \ + static AllocatorRegisterer<t> g_allocator_##d(f); \ + } + +} // namespace caffe2 diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h index fa409a1e4c..54e8b5ba5d 100644 --- a/aten/src/ATen/core/TensorImpl.h +++ b/aten/src/ATen/core/TensorImpl.h @@ -11,7 +11,7 @@ #include <ATen/core/context_base.h> #include <ATen/core/optional.h> -#include "c10/util/Flags.h" +#include <c10/util/Flags.h> #include "caffe2/core/allocator.h" #include "caffe2/core/common.h" @@ -765,13 +765,14 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { CAFFE_ENFORCE( allocator == nullptr, "Allocator is not used within Caffe2 functions, please use StaticContext instead."); + allocator = caffe2::GetAllocator(storage_.device_type()); if (meta.ctor()) { // For types that need placement new, we will call it, as well as // making sure that when the data is freed, it calls the right // destruction procedure. auto size = numel_; auto dtor = data_type_.dtor(); - auto data_ptr = GetStaticContext()->New( + auto data_ptr = allocator->allocate( numel_ * storage_.itemsize()); // Removing this can get rid of // InefficientStdFunctionContext storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr( @@ -783,7 +784,7 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { } else { // For fundamental type, new and delete is easier. storage_.set_data_ptr( - GetStaticContext()->New(numel_ * storage_.itemsize())); + allocator->allocate(numel_ * storage_.itemsize())); } storage_.set_numel(numel_); AT_ASSERT(storage_offset_ == 0); // because we just reallocated diff --git a/aten/src/ATen/core/context_base.h b/aten/src/ATen/core/context_base.h index 1aeec94ab1..7d25ebea88 100644 --- a/aten/src/ATen/core/context_base.h +++ b/aten/src/ATen/core/context_base.h @@ -8,9 +8,7 @@ #include <ATen/core/ATenGeneral.h> #include <ATen/core/Allocator.h> -#include <ATen/core/Device.h> #include <ATen/core/Error.h> -#include <ATen/core/UniqueVoidPtr.h> #include <ATen/core/typeid.h> #include <c10/util/Registry.h> @@ -31,8 +29,6 @@ class CAFFE2_API BaseStaticContext { public: virtual ~BaseStaticContext() noexcept {} - virtual at::DataPtr New(size_t nbytes) const = 0; - virtual DeviceType GetDeviceType() = 0; /* diff --git a/caffe2/core/allocator.cc b/caffe2/core/allocator.cc index 4155daefe8..91c3bdfd3d 100644 --- a/caffe2/core/allocator.cc +++ b/caffe2/core/allocator.cc @@ -1,3 +1,4 @@ +#include <ATen/core/Allocator.h> #include "caffe2/core/context.h" #include "caffe2/core/logging.h" #include "caffe2/core/typeid.h" @@ -16,16 +17,19 @@ namespace caffe2 { void NoDelete(void*) {} -static std::unique_ptr<at::Allocator> g_cpu_allocator( - new DefaultCPUAllocator()); at::Allocator* GetCPUAllocator() { - return g_cpu_allocator.get(); + return GetAllocator(CPU); } void SetCPUAllocator(at::Allocator* alloc) { - g_cpu_allocator.reset(alloc); + SetAllocator(CPU, alloc); } +// Global default CPU Allocator +static DefaultCPUAllocator g_cpu_alloc; + +REGISTER_ALLOCATOR(CPU, &g_cpu_alloc); + MemoryAllocationReporter DefaultCPUAllocator::reporter_; void MemoryAllocationReporter::New(void* ptr, size_t nbytes) { diff --git a/caffe2/core/context.h b/caffe2/core/context.h index c81059b55e..2f8724b799 100644 --- a/caffe2/core/context.h +++ b/caffe2/core/context.h @@ -86,7 +86,7 @@ class CAFFE2_API CPUContext final : public BaseContext { } inline static at::DataPtr New(size_t nbytes) { - return StaticContext()->New(nbytes); + return GetCPUAllocator()->allocate(nbytes); } void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override { @@ -185,9 +185,6 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>( // TODO(jerryzh): merge CPUStaticContext with Allocator class CAFFE2_API CPUStaticContext : public BaseStaticContext { public: - at::DataPtr New(size_t nbytes) const override { - return GetCPUAllocator()->allocate(nbytes); - } DeviceType GetDeviceType() override { return CPU; diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu index a6409d9933..e846560c49 100644 --- a/caffe2/core/context_gpu.cu +++ b/caffe2/core/context_gpu.cu @@ -209,6 +209,8 @@ static void Caffe2SetCUDAMemoryPool() { } } +static PinnedCPUAllocator g_pinned_cpu_alloc; + // An initialization function that sets the CPU side to use pinned cpu // allocator. void Caffe2UsePinnedCPUAllocator() { @@ -226,7 +228,7 @@ void Caffe2UsePinnedCPUAllocator() { return; } VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator."; - SetCPUAllocator(new PinnedCPUAllocator()); + SetCPUAllocator(&g_pinned_cpu_alloc); #endif } @@ -323,11 +325,6 @@ void TrackMemoryAlloc(size_t nbytes) { } } -// TODO: wrap this function in DefaultCUDAAllocator -at::DataPtr CUDAStaticContext::New(size_t nbytes) const { - return GetCUDAAllocator()->allocate(nbytes); -} - struct DefaultCUDAAllocator final : public at::Allocator { DefaultCUDAAllocator() {} ~DefaultCUDAAllocator() override {} @@ -427,12 +424,14 @@ struct DefaultCUDAAllocator final : public at::Allocator { } }; -static std::unique_ptr<at::Allocator> g_cuda_allocator( - new DefaultCUDAAllocator()); at::Allocator* GetCUDAAllocator() { - return g_cuda_allocator.get(); + return GetAllocator(CUDA); } +static DefaultCUDAAllocator g_cuda_alloc; + +REGISTER_ALLOCATOR(CUDA, &g_cuda_alloc); + BaseStaticContext* GetCUDAStaticContext() { static CUDAStaticContext context; return &context; diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h index 79997941f9..9882d41392 100644 --- a/caffe2/core/context_gpu.h +++ b/caffe2/core/context_gpu.h @@ -224,7 +224,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext { } inline static at::DataPtr New(size_t nbytes) { - return StaticContext()->New(nbytes); + return GetAllocator(CUDA)->allocate(nbytes); } // Get a mutex to lock out cudaMalloc / cudaFree calls when @@ -387,8 +387,6 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator { class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext { public: - at::DataPtr New(size_t nbytes) const override; - DeviceType GetDeviceType() override { return CUDA; } @@ -401,9 +399,6 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext { }; -// Get the CUDA Alloctor. -CAFFE2_API at::Allocator* GetCUDAAllocator(); - using TensorCUDA = Tensor; } // namespace caffe2 diff --git a/caffe2/core/hip/context_hip.cc b/caffe2/core/hip/context_hip.cc index 3ee4ce6035..7e5e49b4a3 100644 --- a/caffe2/core/hip/context_hip.cc +++ b/caffe2/core/hip/context_hip.cc @@ -203,6 +203,8 @@ static void Caffe2SetHIPMemoryPool() } } +static PinnedCPUAllocator g_pinned_cpu_alloc; + // An initialization function that sets the CPU side to use pinned cpu // allocator. void Caffe2UsePinnedCPUAllocator() @@ -222,7 +224,7 @@ void Caffe2UsePinnedCPUAllocator() return; } VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator."; - SetCPUAllocator(new PinnedCPUAllocator()); + SetCPUAllocator(&g_pinned_cpu_alloc); #endif } @@ -326,10 +328,6 @@ void TrackMemoryAlloc(size_t nbytes) } } -at::DataPtr HIPStaticContext::New(size_t nbytes) const { - return GetHIPAllocator()->allocate(nbytes); -} - struct DefaultHIPAllocator final : public at::Allocator { DefaultHIPAllocator() {} ~DefaultHIPAllocator() override {} @@ -429,11 +427,8 @@ struct DefaultHIPAllocator final : public at::Allocator { } }; -static std::unique_ptr<at::Allocator> g_hip_allocator( - new DefaultHIPAllocator()); -at::Allocator* GetHIPAllocator() { - return g_hip_allocator.get(); -} +static DefaultHIPAllocator g_hip_alloc; +REGISTER_ALLOCATOR(HIP, &g_hip_alloc); BaseStaticContext* GetHIPStaticContext() { static HIPStaticContext context; diff --git a/caffe2/core/hip/context_hip.h b/caffe2/core/hip/context_hip.h index 2eb01f2bce..b2fa347144 100644 --- a/caffe2/core/hip/context_hip.h +++ b/caffe2/core/hip/context_hip.h @@ -207,7 +207,7 @@ class HIPContext final : public BaseContext { } static at::DataPtr New(size_t nbytes) { - return StaticContext()->New(nbytes); + return GetAllocator(HIP)->allocate(nbytes); } // Get a mutex to lock out hipMalloc / hipFree calls when @@ -376,8 +376,6 @@ struct PinnedCPUAllocator final : public at::Allocator { class HIPStaticContext final : public BaseStaticContext { public: - at::DataPtr New(size_t nbytes) const override; - DeviceType GetDeviceType() override { return HIP; } @@ -389,9 +387,6 @@ class HIPStaticContext final : public BaseStaticContext { }; -// Get the HIP Alloctor. -CAFFE2_API at::Allocator* GetHIPAllocator(); - typedef Tensor TensorHIP; } // namespace caffe2 diff --git a/caffe2/ideep/utils/ideep_context.h b/caffe2/ideep/utils/ideep_context.h index b5c702ea3d..11bf97ee44 100644 --- a/caffe2/ideep/utils/ideep_context.h +++ b/caffe2/ideep/utils/ideep_context.h @@ -56,7 +56,7 @@ class IDEEPContext final : public BaseContext { } inline static at::DataPtr New(size_t nbytes) { - return StaticContext()->New(nbytes); + return GetAllocator(CPU)->allocate(nbytes); } void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override { @@ -176,10 +176,6 @@ inline void IDEEPContext::CopyBytes<IDEEPContext, CPUContext>( class IDEEPStaticContext : public BaseStaticContext { public: - inline at::DataPtr New(size_t nbytes) const override { - return GetCPUAllocator()->allocate(nbytes); - } - DeviceType GetDeviceType() override { return IDEEP; } diff --git a/caffe2/mkl/utils/mkl_context.h b/caffe2/mkl/utils/mkl_context.h index 7735283b6e..a010b04de3 100644 --- a/caffe2/mkl/utils/mkl_context.h +++ b/caffe2/mkl/utils/mkl_context.h @@ -63,7 +63,7 @@ class MKLContext : public BaseContext { } inline static at::DataPtr New(size_t nbytes) { - return StaticContext()->New(nbytes); + return GetAllocator(CPU)->allocate(nbytes); } void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override { @@ -153,10 +153,6 @@ inline void MKLContext::CopyBytes<MKLContext, MKLContext>( class MKLStaticContext : public BaseStaticContext { public: - inline at::DataPtr New(size_t nbytes) const override { - return GetCPUAllocator()->allocate(nbytes); - } - DeviceType GetDeviceType() override { return MKLDNN; } |