summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Zhang <jerryzh@fb.com>2018-10-09 10:47:24 -0700
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-10-09 10:53:52 -0700
commit1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f (patch)
treeff1ba37b7ca985c59274faef79e979d3bb83d4a6
parentf564163951b15a14f777f01f0caf1b8ce64d3f00 (diff)
downloadpytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.gz
pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.tar.bz2
pytorch-1c69d368e1bc810c49939ac9fb4eb1f5e20bca3f.zip
Remove New with Allocator Registry (#12111)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12111 Setup allocator registry keyed by at::DeviceType, and remove New from StaticContext. Reviewed By: ezyang Differential Revision: D10022853 fbshipit-source-id: 3e88a181fe5df24f33f49b88be1f75284a185588
-rw-r--r--aten/src/ATen/core/Allocator.cpp17
-rw-r--r--aten/src/ATen/core/Allocator.h28
-rw-r--r--aten/src/ATen/core/TensorImpl.h7
-rw-r--r--aten/src/ATen/core/context_base.h4
-rw-r--r--caffe2/core/allocator.cc12
-rw-r--r--caffe2/core/context.h5
-rw-r--r--caffe2/core/context_gpu.cu17
-rw-r--r--caffe2/core/context_gpu.h7
-rw-r--r--caffe2/core/hip/context_hip.cc15
-rw-r--r--caffe2/core/hip/context_hip.h7
-rw-r--r--caffe2/ideep/utils/ideep_context.h6
-rw-r--r--caffe2/mkl/utils/mkl_context.h6
12 files changed, 75 insertions, 56 deletions
diff --git a/aten/src/ATen/core/Allocator.cpp b/aten/src/ATen/core/Allocator.cpp
index d6c07cecc8..3a958cc277 100644
--- a/aten/src/ATen/core/Allocator.cpp
+++ b/aten/src/ATen/core/Allocator.cpp
@@ -17,3 +17,20 @@ at::DataPtr InefficientStdFunctionContext::makeDataPtr(
}
} // namespace at
+
+namespace caffe2 {
+
+CAFFE2_API at::Allocator* allocator_array[static_cast<int>(
+ at::DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)];
+
+void SetAllocator(at::DeviceType t, at::Allocator* alloc) {
+ allocator_array[static_cast<int>(t)] = alloc;
+}
+
+at::Allocator* GetAllocator(const at::DeviceType& t) {
+ auto* alloc = allocator_array[static_cast<int>(t)];
+ AT_ASSERTM(alloc, "Allocator for ", t, " is not set.");
+ return alloc;
+}
+
+} // namespace caffe2
diff --git a/aten/src/ATen/core/Allocator.h b/aten/src/ATen/core/Allocator.h
index c8c8a236fb..239698901c 100644
--- a/aten/src/ATen/core/Allocator.h
+++ b/aten/src/ATen/core/Allocator.h
@@ -133,3 +133,31 @@ struct CAFFE2_API InefficientStdFunctionContext {
};
} // namespace at
+
+namespace caffe2 {
+
+/** Set the allocator for DeviceType `t`. The passed in allocator pointer is
+ * expected to have static lifetime; this function does NOT take ownership
+ * of the raw pointer. (The reason for this is to prevent existing pointers
+ * to an allocator of a particular device from being invalidated when
+ * SetAllocator is called.)
+ *
+ * Also note that this is not thraed-safe, and we assume this function will
+ * only be called during initialization.
+ */
+CAFFE2_API void SetAllocator(at::DeviceType t, at::Allocator* alloc);
+CAFFE2_API at::Allocator* GetAllocator(const at::DeviceType& t);
+
+template <at::DeviceType t>
+struct AllocatorRegisterer {
+ explicit AllocatorRegisterer(at::Allocator* alloc) {
+ SetAllocator(t, alloc);
+ }
+};
+
+#define REGISTER_ALLOCATOR(t, f) \
+ namespace { \
+ static AllocatorRegisterer<t> g_allocator_##d(f); \
+ }
+
+} // namespace caffe2
diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h
index fa409a1e4c..54e8b5ba5d 100644
--- a/aten/src/ATen/core/TensorImpl.h
+++ b/aten/src/ATen/core/TensorImpl.h
@@ -11,7 +11,7 @@
#include <ATen/core/context_base.h>
#include <ATen/core/optional.h>
-#include "c10/util/Flags.h"
+#include <c10/util/Flags.h>
#include "caffe2/core/allocator.h"
#include "caffe2/core/common.h"
@@ -765,13 +765,14 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
CAFFE_ENFORCE(
allocator == nullptr,
"Allocator is not used within Caffe2 functions, please use StaticContext instead.");
+ allocator = caffe2::GetAllocator(storage_.device_type());
if (meta.ctor()) {
// For types that need placement new, we will call it, as well as
// making sure that when the data is freed, it calls the right
// destruction procedure.
auto size = numel_;
auto dtor = data_type_.dtor();
- auto data_ptr = GetStaticContext()->New(
+ auto data_ptr = allocator->allocate(
numel_ * storage_.itemsize()); // Removing this can get rid of
// InefficientStdFunctionContext
storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr(
@@ -783,7 +784,7 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
} else {
// For fundamental type, new and delete is easier.
storage_.set_data_ptr(
- GetStaticContext()->New(numel_ * storage_.itemsize()));
+ allocator->allocate(numel_ * storage_.itemsize()));
}
storage_.set_numel(numel_);
AT_ASSERT(storage_offset_ == 0); // because we just reallocated
diff --git a/aten/src/ATen/core/context_base.h b/aten/src/ATen/core/context_base.h
index 1aeec94ab1..7d25ebea88 100644
--- a/aten/src/ATen/core/context_base.h
+++ b/aten/src/ATen/core/context_base.h
@@ -8,9 +8,7 @@
#include <ATen/core/ATenGeneral.h>
#include <ATen/core/Allocator.h>
-#include <ATen/core/Device.h>
#include <ATen/core/Error.h>
-#include <ATen/core/UniqueVoidPtr.h>
#include <ATen/core/typeid.h>
#include <c10/util/Registry.h>
@@ -31,8 +29,6 @@ class CAFFE2_API BaseStaticContext {
public:
virtual ~BaseStaticContext() noexcept {}
- virtual at::DataPtr New(size_t nbytes) const = 0;
-
virtual DeviceType GetDeviceType() = 0;
/*
diff --git a/caffe2/core/allocator.cc b/caffe2/core/allocator.cc
index 4155daefe8..91c3bdfd3d 100644
--- a/caffe2/core/allocator.cc
+++ b/caffe2/core/allocator.cc
@@ -1,3 +1,4 @@
+#include <ATen/core/Allocator.h>
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/typeid.h"
@@ -16,16 +17,19 @@ namespace caffe2 {
void NoDelete(void*) {}
-static std::unique_ptr<at::Allocator> g_cpu_allocator(
- new DefaultCPUAllocator());
at::Allocator* GetCPUAllocator() {
- return g_cpu_allocator.get();
+ return GetAllocator(CPU);
}
void SetCPUAllocator(at::Allocator* alloc) {
- g_cpu_allocator.reset(alloc);
+ SetAllocator(CPU, alloc);
}
+// Global default CPU Allocator
+static DefaultCPUAllocator g_cpu_alloc;
+
+REGISTER_ALLOCATOR(CPU, &g_cpu_alloc);
+
MemoryAllocationReporter DefaultCPUAllocator::reporter_;
void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
diff --git a/caffe2/core/context.h b/caffe2/core/context.h
index c81059b55e..2f8724b799 100644
--- a/caffe2/core/context.h
+++ b/caffe2/core/context.h
@@ -86,7 +86,7 @@ class CAFFE2_API CPUContext final : public BaseContext {
}
inline static at::DataPtr New(size_t nbytes) {
- return StaticContext()->New(nbytes);
+ return GetCPUAllocator()->allocate(nbytes);
}
void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -185,9 +185,6 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
// TODO(jerryzh): merge CPUStaticContext with Allocator
class CAFFE2_API CPUStaticContext : public BaseStaticContext {
public:
- at::DataPtr New(size_t nbytes) const override {
- return GetCPUAllocator()->allocate(nbytes);
- }
DeviceType GetDeviceType() override {
return CPU;
diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu
index a6409d9933..e846560c49 100644
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@@ -209,6 +209,8 @@ static void Caffe2SetCUDAMemoryPool() {
}
}
+static PinnedCPUAllocator g_pinned_cpu_alloc;
+
// An initialization function that sets the CPU side to use pinned cpu
// allocator.
void Caffe2UsePinnedCPUAllocator() {
@@ -226,7 +228,7 @@ void Caffe2UsePinnedCPUAllocator() {
return;
}
VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
- SetCPUAllocator(new PinnedCPUAllocator());
+ SetCPUAllocator(&g_pinned_cpu_alloc);
#endif
}
@@ -323,11 +325,6 @@ void TrackMemoryAlloc(size_t nbytes) {
}
}
-// TODO: wrap this function in DefaultCUDAAllocator
-at::DataPtr CUDAStaticContext::New(size_t nbytes) const {
- return GetCUDAAllocator()->allocate(nbytes);
-}
-
struct DefaultCUDAAllocator final : public at::Allocator {
DefaultCUDAAllocator() {}
~DefaultCUDAAllocator() override {}
@@ -427,12 +424,14 @@ struct DefaultCUDAAllocator final : public at::Allocator {
}
};
-static std::unique_ptr<at::Allocator> g_cuda_allocator(
- new DefaultCUDAAllocator());
at::Allocator* GetCUDAAllocator() {
- return g_cuda_allocator.get();
+ return GetAllocator(CUDA);
}
+static DefaultCUDAAllocator g_cuda_alloc;
+
+REGISTER_ALLOCATOR(CUDA, &g_cuda_alloc);
+
BaseStaticContext* GetCUDAStaticContext() {
static CUDAStaticContext context;
return &context;
diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h
index 79997941f9..9882d41392 100644
--- a/caffe2/core/context_gpu.h
+++ b/caffe2/core/context_gpu.h
@@ -224,7 +224,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
}
inline static at::DataPtr New(size_t nbytes) {
- return StaticContext()->New(nbytes);
+ return GetAllocator(CUDA)->allocate(nbytes);
}
// Get a mutex to lock out cudaMalloc / cudaFree calls when
@@ -387,8 +387,6 @@ struct CAFFE2_CUDA_API PinnedCPUAllocator final : public at::Allocator {
class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
public:
- at::DataPtr New(size_t nbytes) const override;
-
DeviceType GetDeviceType() override {
return CUDA;
}
@@ -401,9 +399,6 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
};
-// Get the CUDA Alloctor.
-CAFFE2_API at::Allocator* GetCUDAAllocator();
-
using TensorCUDA = Tensor;
} // namespace caffe2
diff --git a/caffe2/core/hip/context_hip.cc b/caffe2/core/hip/context_hip.cc
index 3ee4ce6035..7e5e49b4a3 100644
--- a/caffe2/core/hip/context_hip.cc
+++ b/caffe2/core/hip/context_hip.cc
@@ -203,6 +203,8 @@ static void Caffe2SetHIPMemoryPool()
}
}
+static PinnedCPUAllocator g_pinned_cpu_alloc;
+
// An initialization function that sets the CPU side to use pinned cpu
// allocator.
void Caffe2UsePinnedCPUAllocator()
@@ -222,7 +224,7 @@ void Caffe2UsePinnedCPUAllocator()
return;
}
VLOG(1) << "Caffe2 gpu: setting CPUAllocator to PinnedCPUAllocator.";
- SetCPUAllocator(new PinnedCPUAllocator());
+ SetCPUAllocator(&g_pinned_cpu_alloc);
#endif
}
@@ -326,10 +328,6 @@ void TrackMemoryAlloc(size_t nbytes)
}
}
-at::DataPtr HIPStaticContext::New(size_t nbytes) const {
- return GetHIPAllocator()->allocate(nbytes);
-}
-
struct DefaultHIPAllocator final : public at::Allocator {
DefaultHIPAllocator() {}
~DefaultHIPAllocator() override {}
@@ -429,11 +427,8 @@ struct DefaultHIPAllocator final : public at::Allocator {
}
};
-static std::unique_ptr<at::Allocator> g_hip_allocator(
- new DefaultHIPAllocator());
-at::Allocator* GetHIPAllocator() {
- return g_hip_allocator.get();
-}
+static DefaultHIPAllocator g_hip_alloc;
+REGISTER_ALLOCATOR(HIP, &g_hip_alloc);
BaseStaticContext* GetHIPStaticContext() {
static HIPStaticContext context;
diff --git a/caffe2/core/hip/context_hip.h b/caffe2/core/hip/context_hip.h
index 2eb01f2bce..b2fa347144 100644
--- a/caffe2/core/hip/context_hip.h
+++ b/caffe2/core/hip/context_hip.h
@@ -207,7 +207,7 @@ class HIPContext final : public BaseContext {
}
static at::DataPtr New(size_t nbytes) {
- return StaticContext()->New(nbytes);
+ return GetAllocator(HIP)->allocate(nbytes);
}
// Get a mutex to lock out hipMalloc / hipFree calls when
@@ -376,8 +376,6 @@ struct PinnedCPUAllocator final : public at::Allocator {
class HIPStaticContext final : public BaseStaticContext {
public:
- at::DataPtr New(size_t nbytes) const override;
-
DeviceType GetDeviceType() override {
return HIP;
}
@@ -389,9 +387,6 @@ class HIPStaticContext final : public BaseStaticContext {
};
-// Get the HIP Alloctor.
-CAFFE2_API at::Allocator* GetHIPAllocator();
-
typedef Tensor TensorHIP;
} // namespace caffe2
diff --git a/caffe2/ideep/utils/ideep_context.h b/caffe2/ideep/utils/ideep_context.h
index b5c702ea3d..11bf97ee44 100644
--- a/caffe2/ideep/utils/ideep_context.h
+++ b/caffe2/ideep/utils/ideep_context.h
@@ -56,7 +56,7 @@ class IDEEPContext final : public BaseContext {
}
inline static at::DataPtr New(size_t nbytes) {
- return StaticContext()->New(nbytes);
+ return GetAllocator(CPU)->allocate(nbytes);
}
void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -176,10 +176,6 @@ inline void IDEEPContext::CopyBytes<IDEEPContext, CPUContext>(
class IDEEPStaticContext : public BaseStaticContext {
public:
- inline at::DataPtr New(size_t nbytes) const override {
- return GetCPUAllocator()->allocate(nbytes);
- }
-
DeviceType GetDeviceType() override {
return IDEEP;
}
diff --git a/caffe2/mkl/utils/mkl_context.h b/caffe2/mkl/utils/mkl_context.h
index 7735283b6e..a010b04de3 100644
--- a/caffe2/mkl/utils/mkl_context.h
+++ b/caffe2/mkl/utils/mkl_context.h
@@ -63,7 +63,7 @@ class MKLContext : public BaseContext {
}
inline static at::DataPtr New(size_t nbytes) {
- return StaticContext()->New(nbytes);
+ return GetAllocator(CPU)->allocate(nbytes);
}
void CopyBytesSameDevice(size_t nbytes, const void* src, void* dst) override {
@@ -153,10 +153,6 @@ inline void MKLContext::CopyBytes<MKLContext, MKLContext>(
class MKLStaticContext : public BaseStaticContext {
public:
- inline at::DataPtr New(size_t nbytes) const override {
- return GetCPUAllocator()->allocate(nbytes);
- }
-
DeviceType GetDeviceType() override {
return MKLDNN;
}