Remove New with Allocator Registry (#12111)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12111 Setup allocator registry keyed by at::DeviceType, and remove New from StaticContext. Reviewed By: ezyang Differential Revision: D10022853 fbshipit-source-id: 3e88a181fe5df24f33f49b88be1f75284a185588
author: Jerry Zhang <jerryzh@fb.com> 2018-10-09 10:47:24 -0700
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-10-09 10:53:52 -0700
commit: 1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f (patch)
tree: ff1ba37b7ca985c59274faef79e979d3bb83d4a6
parent: f564163951b15a14f777f01f0caf1b8ce64d3f00 (diff)
download: pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.gz
pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.bz2
pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.zip
12 files changed, 75 insertions, 56 deletions
diff --git a/aten/src/ATen/core/Allocator.cpp b/aten/src/ATen/core/Allocator.cpp
index d6c07cecc8..3a958cc277 100644
--- a/aten/src/ATen/core/Allocator.cpp
+++ b/aten/src/ATen/core/Allocator.cpp
@@ -17,3 +17,20 @@ at::DataPtr InefficientStdFunctionContext::makeDataPtr(
 }
 
 } // namespace at
+
+namespace caffe2 {
+
+CAFFE2_API at::Allocator* allocator_array[static_cast<int>(
+    at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)];
+
+void SetAllocator(at::DeviceType t, at::Allocator* alloc) {
+  allocator_array[static_cast<int>(t)] = alloc;
+}
+
+at::Allocator* GetAllocator(const at::DeviceType& t) {
+  auto* alloc = allocator_array[static_cast<int>(t)];
+  AT_ASSERTM(alloc, "Allocator for ", t, " is not set.");
+  return alloc;
+}
+
+} // namespace caffe2
diff --git a/aten/src/ATen/core/Allocator.h b/aten/src/ATen/core/Allocator.h
index c8c8a236fb..239698901c 100644
--- a/aten/src/ATen/core/Allocator.h
+++ b/aten/src/ATen/core/Allocator.h
@@ -133,3 +133,31 @@ struct CAFFE2_API InefficientStdFunctionContext {
 };
 
 } // namespace at
+
+namespace caffe2 {
+
+/** Set the allocator for DeviceType `t`. The passed in allocator pointer is
+ *  expected to have static lifetime; this function does NOT take ownership
+ *  of the raw pointer. (The reason for this is to prevent existing pointers
+ *  to an allocator of a particular device from being invalidated when
+ *  SetAllocator is called.)
+ *
+ *  Also note that this is not thraed-safe, and we assume this function will
+ *  only be called during initialization.
+ */
+CAFFE2_API void SetAllocator(at::DeviceType t, at::Allocator* alloc);
+CAFFE2_API at::Allocator* GetAllocator(const at::DeviceType& t);
+
+template <at::DeviceType t>
+struct AllocatorRegisterer {
+  explicit AllocatorRegisterer(at::Allocator* alloc) {
+    SetAllocator(t, alloc);
+  }
+};
+
+#define REGISTER_ALLOCATOR(t, f)                    \
+  namespace {                                       \
+  static AllocatorRegisterer<t> g_allocator_##d(f); \
+  }
+
+} // namespace caffe2
diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h
index fa409a1e4c..54e8b5ba5d 100644
--- a/aten/src/ATen/core/TensorImpl.h
+++ b/aten/src/ATen/core/TensorImpl.h
@@ -11,7 +11,7 @@
 #include <ATen/core/context_base.h>
 #include <ATen/core/optional.h>
 
-#include "c10/util/Flags.h"
+#include <c10/util/Flags.h>
 
 #include "caffe2/core/allocator.h"
 #include "caffe2/core/common.h"
@@ -765,13 +765,14 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
       CAFFE_ENFORCE(
           allocator == nullptr,
           "Allocator is not used within Caffe2 functions, please use StaticContext instead.");
+      allocator = caffe2::GetAllocator(storage_.device_type());
       if (meta.ctor()) {
         // For types that need placement new, we will call it, as well as
         // making sure that when the data is freed, it calls the right
         // destruction procedure.
         auto size = numel_;
         auto dtor = data_type_.dtor();
-        auto data_ptr = GetStaticContext()->New(
+        auto data_ptr = allocator->allocate(
             numel_ * storage_.itemsize()); // Removing this can get rid of
                                            // InefficientStdFunctionContext
         storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr(
@@ -783,7 +784,7 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
       } else {
         // For fundamental type, new and delete is easier.
         storage_.set_data_ptr(
-            GetStaticContext()->New(numel_ * storage_.itemsize()));
+            allocator->allocate(numel_ * storage_.itemsize()));
       }
       storage_.set_numel(numel_);
       AT_ASSERT(storage_offset_ == 0); // because we just reallocated
diff --git a/aten/src/ATen/core/context_base.h b/aten/src/ATen/core/context_base.h
index 1aeec94ab1..7d25ebea88 100644
--- a/aten/src/ATen/core/context_base.h
+++ b/aten/src/ATen/core/context_base.h
@@ -8,9 +8,7 @@
 
 #include <ATen/core/ATenGeneral.h>
 #include <ATen/core/Allocator.h>
-#include <ATen/core/Device.h>
 #include <ATen/core/Error.h>
-#include <ATen/core/UniqueVoidPtr.h>
 #include <ATen/core/typeid.h>
 #include <c10/util/Registry.h>
 
@@ -31,8 +29,6 @@ class CAFFE2_API BaseStaticContext {
  public:
   virtual ~BaseStaticContext() noexcept {}
 
-  virtual at::DataPtr New(size_t nbytes) const = 0;
-
   virtual DeviceType GetDeviceType() = 0;
 
   /*
diff --git a/caffe2/core/allocator.cc b/caffe2/core/allocator.cc
index 4155daefe8..91c3bdfd3d 100644
--- a/caffe2/core/allocator.cc
+++ b/caffe2/core/allocator.cc
@@ -1,3 +1,4 @@
+#include <ATen/core/Allocator.h>
 #include "caffe2/core/context.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/typeid.h"
@@ -16,16 +17,19 @@ namespace caffe2 {
 
 void NoDelete(void*) {}
 
-static std::unique_ptr<at::Allocator> g_cpu_allocator(
-    new DefaultCPUAllocator());
 at::Allocator* GetCPUAllocator() {
-  return g_cpu_allocator.get();
+  return GetAllocator(CPU);
 }
 
 void SetCPUAllocator(at::Allocator* alloc) {
-  g_cpu_allocator.reset(alloc);
+  SetAllocator(CPU, alloc);
 }
 
+// Global default CPU Allocator
+static DefaultCPUAllocator g_cpu_alloc;
+
+REGISTER_ALLOCATOR(CPU, &g_cpu_alloc);
+
 MemoryAllocationReporter DefaultCPUAllocator::reporter_;
 
 void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
diff --git a/caffe2/core/context.h b/caffe2/core/context.h
index c81059b55e..2f8724b799 100644
--- a/caffe2/core/context.h
+++ b/caffe2/core/context.h
@@ -86,7 +86,7 @@ class CAFFE2_API CPUContext final : public BaseContext {
   }
 
   inline static at::DataPtr New(size_t nbytes) {
-    return StaticContext()->New(nbytes);
+    return GetCPUAllocator()->allocate(nbytes);
   }
 
   void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -185,9 +185,6 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
 // TODO(jerryzh): merge CPUStaticContext with Allocator
 class CAFFE2_API CPUStaticContext : public BaseStaticContext {
  public:
-  at::DataPtr New(size_t nbytes) const override {
-    return GetCPUAllocator()->allocate(nbytes);
-  }
 
   DeviceType GetDeviceType() override {
     return CPU;
diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu
index a6409d9933..e846560c49 100644
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@@ -209,6 +209,8 @@ static void Caffe2SetCUDAMemoryPool() {
   }
 }
 
+static PinnedCPUAllocator g_pinned_cpu_alloc;
+
 // An initialization function that sets the CPU side to use pinned cpu
 // allocator.
 void Caffe2UsePinnedCPUAllocator() {
@@ -226,7 +228,7 @@ void Caffe2UsePinnedCPUAllocator() {
     return;
   }
   VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
-  SetCPUAllocator(new PinnedCPUAllocator());
+  SetCPUAllocator(&g_pinned_cpu_alloc);
 #endif
 }
 
@@ -323,11 +325,6 @@ void TrackMemoryAlloc(size_t nbytes) {
 }
 }
 
-// TODO: wrap this function in DefaultCUDAAllocator
-at::DataPtr CUDAStaticContext::New(size_t nbytes) const {
-  return GetCUDAAllocator()->allocate(nbytes);
-}
-
 struct DefaultCUDAAllocator final : public at::Allocator {
   DefaultCUDAAllocator() {}
   ~DefaultCUDAAllocator() override {}
@@ -427,12 +424,14 @@ struct DefaultCUDAAllocator final : public at::Allocator {
   }
 };
 
-static std::unique_ptr<at::Allocator> g_cuda_allocator(
-    new DefaultCUDAAllocator());
 at::Allocator* GetCUDAAllocator() {
-  return g_cuda_allocator.get();
+  return GetAllocator(CUDA);
 }
 
+static DefaultCUDAAllocator g_cuda_alloc;
+
+REGISTER_ALLOCATOR(CUDA, &g_cuda_alloc);
+
 BaseStaticContext* GetCUDAStaticContext() {
   static CUDAStaticContext context;
   return &context;
diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h
index 79997941f9..9882d41392 100644
--- a/caffe2/core/context_gpu.h
+++ b/caffe2/core/context_gpu.h
@@ -224,7 +224,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
   }
 
   inline static at::DataPtr New(size_t nbytes) {
-    return StaticContext()->New(nbytes);
+    return GetAllocator(CUDA)->allocate(nbytes);
   }
 
   // Get a mutex to lock out cudaMalloc / cudaFree calls when
@@ -387,8 +387,6 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
 
 class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
  public:
-  at::DataPtr New(size_t nbytes) const override;
-
   DeviceType GetDeviceType() override {
     return CUDA;
   }
@@ -401,9 +399,6 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
 
 };
 
-// Get the CUDA Alloctor.
-CAFFE2_API at::Allocator* GetCUDAAllocator();
-
 using TensorCUDA = Tensor;
 
 }  // namespace caffe2
diff --git a/caffe2/core/hip/context_hip.cc b/caffe2/core/hip/context_hip.cc
index 3ee4ce6035..7e5e49b4a3 100644
--- a/caffe2/core/hip/context_hip.cc
+++ b/caffe2/core/hip/context_hip.cc
@@ -203,6 +203,8 @@ static void Caffe2SetHIPMemoryPool()
   }
 }
 
+static PinnedCPUAllocator g_pinned_cpu_alloc;
+
 // An initialization function that sets the CPU side to use pinned cpu
 // allocator.
 void Caffe2UsePinnedCPUAllocator()
@@ -222,7 +224,7 @@ void Caffe2UsePinnedCPUAllocator()
         return;
     }
     VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
-    SetCPUAllocator(new PinnedCPUAllocator());
+    SetCPUAllocator(&g_pinned_cpu_alloc);
 #endif
 }
 
@@ -326,10 +328,6 @@ void TrackMemoryAlloc(size_t nbytes)
 }
 }
 
-at::DataPtr HIPStaticContext::New(size_t nbytes) const {
-  return GetHIPAllocator()->allocate(nbytes);
-}
-
 struct DefaultHIPAllocator final : public at::Allocator {
   DefaultHIPAllocator() {}
   ~DefaultHIPAllocator() override {}
@@ -429,11 +427,8 @@ struct DefaultHIPAllocator final : public at::Allocator {
   }
 };
 
-static std::unique_ptr<at::Allocator> g_hip_allocator(
-    new DefaultHIPAllocator());
-at::Allocator* GetHIPAllocator() {
-  return g_hip_allocator.get();
-}
+static DefaultHIPAllocator g_hip_alloc;
+REGISTER_ALLOCATOR(HIP, &g_hip_alloc);
 
 BaseStaticContext* GetHIPStaticContext() {
   static HIPStaticContext context;
diff --git a/caffe2/core/hip/context_hip.h b/caffe2/core/hip/context_hip.h
index 2eb01f2bce..b2fa347144 100644
--- a/caffe2/core/hip/context_hip.h
+++ b/caffe2/core/hip/context_hip.h
@@ -207,7 +207,7 @@ class HIPContext final : public BaseContext {
   }
 
   static at::DataPtr New(size_t nbytes) {
-    return StaticContext()->New(nbytes);
+    return GetAllocator(HIP)->allocate(nbytes);
   }
 
   // Get a mutex to lock out hipMalloc / hipFree calls when
@@ -376,8 +376,6 @@ struct PinnedCPUAllocator final : public at::Allocator {
 
 class HIPStaticContext final : public BaseStaticContext {
  public:
-  at::DataPtr New(size_t nbytes) const override;
-
   DeviceType GetDeviceType() override {
     return HIP;
   }
@@ -389,9 +387,6 @@ class HIPStaticContext final : public BaseStaticContext {
 
 };
 
-// Get the HIP Alloctor.
-CAFFE2_API at::Allocator* GetHIPAllocator();
-
 typedef Tensor TensorHIP;
 
 } // namespace caffe2
diff --git a/caffe2/ideep/utils/ideep_context.h b/caffe2/ideep/utils/ideep_context.h
index b5c702ea3d..11bf97ee44 100644
--- a/caffe2/ideep/utils/ideep_context.h
+++ b/caffe2/ideep/utils/ideep_context.h
@@ -56,7 +56,7 @@ class IDEEPContext final : public BaseContext {
   }
 
   inline static at::DataPtr New(size_t nbytes) {
-    return StaticContext()->New(nbytes);
+    return GetAllocator(CPU)->allocate(nbytes);
   }
 
   void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -176,10 +176,6 @@ inline void IDEEPContext::CopyBytes<IDEEPContext, CPUContext>(
 
 class IDEEPStaticContext : public BaseStaticContext {
  public:
-  inline at::DataPtr New(size_t nbytes) const override {
-    return GetCPUAllocator()->allocate(nbytes);
-  }
-
   DeviceType GetDeviceType() override {
     return IDEEP;
   }
diff --git a/caffe2/mkl/utils/mkl_context.h b/caffe2/mkl/utils/mkl_context.h
index 7735283b6e..a010b04de3 100644
--- a/caffe2/mkl/utils/mkl_context.h
+++ b/caffe2/mkl/utils/mkl_context.h
@@ -63,7 +63,7 @@ class MKLContext : public BaseContext {
   }
 
   inline static at::DataPtr New(size_t nbytes) {
-    return StaticContext()->New(nbytes);
+    return GetAllocator(CPU)->allocate(nbytes);
   }
 
   void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -153,10 +153,6 @@ inline void MKLContext::CopyBytes<MKLContext, MKLContext>(
 
 class MKLStaticContext : public BaseStaticContext {
  public:
-  inline at::DataPtr New(size_t nbytes) const override {
-    return GetCPUAllocator()->allocate(nbytes);
-  }
-
   DeviceType GetDeviceType() override {
     return MKLDNN;
   }
author	Jerry Zhang <jerryzh@fb.com>	2018-10-09 10:47:24 -0700
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-10-09 10:53:52 -0700
commit	1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f (patch)
tree	ff1ba37b7ca985c59274faef79e979d3bb83d4a6
parent	f564163951b15a14f777f01f0caf1b8ce64d3f00 (diff)
download	pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.gz pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.bz2 pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.zip