Remove template parameter from Tensor (#13)

Summary: Pull Request resolved: https://github.com/facebookresearch/weakly-supervised-action-detection/pull/13 Pull Request resolved: https://github.com/pytorch/translate/pull/166 Pull Request resolved: https://github.com/pytorch/pytorch/pull/9125 Closes https://github.com/pytorch/pytorch/pull/9125 Use inheritance for polymorphism, and remove template parameter This is to change the templating in call sites, the core implementations will change later Before Caffe2 Tensor class was compile-time fixed to bind to a particular device/context. With this change, we're making it a runtime property (stored inside the tensor), but preserve the same semantics. For example, one has to specify device type in order to create a Tensor - there are no uninitialized tensors. More specifically the changes are: 1. We added an extra argument *DeviceType* to most of the constructors of the tensor, e.g. (Tensor(DeviceType type)), 2. Semantics of constructor Tensor(const Tensor<SrcContext>& src, ContextForCopy* context); is changed, in this constructor, the second context is passed in to enable us to call the templated Copy function, it could be in a different context as source and target previously, now we'll enforce that the context should have same device type as src, if it is provided. 3. To preserve 'get-or-construct' semantics of Blob, we added specialized getter Blob::GetMutableTensor that verifies both that Blob contains a Tensor and that it's of a correct type 4. Specifically, Tensor type is not default-constructible any more (as we don't have unknown device tensors) and thus some of the code handling STL containers needs to change Note: Some changes are postponed just to keep this diff a bit smaller. Please see `TODO`s. Reviewed By: xw285cornell Differential Revision: D8121878 fbshipit-source-id: 4a5e9a677ba4ac82095df959851a054c81eccf81
author: Jerry Zhang <jerryzh@fb.com> 2018-07-26 10:11:53 -0700
committer: Facebook Github Bot <facebook-github-bot@users.noreply.github.com> 2018-07-26 10:25:23 -0700
commit: cd5adc7b5f7a5d4415a936c5ceca219f14dbb319 (patch)
tree: 5e0ab493963fe277aeb8b9cb41db255d3bc6025a /caffe2/sgd
parent: 2c7e7e37a60a294e1a0583b7d92bff6b1e61cf55 (diff)
download: pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.gz
pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.bz2
pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.zip
8 files changed, 40 insertions, 35 deletions
diff --git a/caffe2/sgd/adam_op.h b/caffe2/sgd/adam_op.h
index bb30247ca5..c25509b3d0 100644
--- a/caffe2/sgd/adam_op.h
+++ b/caffe2/sgd/adam_op.h
@@ -88,7 +88,7 @@ class AdamOp final : public Operator<Context> {
         epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
   bool RunOnDevice() override {
     // Iter live on the CPU
-    CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU));
     CAFFE_ENFORCE(Input(LR).size() == 1);
     CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size());
     CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size());
@@ -98,7 +98,7 @@ class AdamOp final : public Operator<Context> {
     Output(OUTPUT_MOMENT_2)->ResizeLike(Input(MOMENT_2));
 
     const auto iter =
-        OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+        OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
 
     const auto t = iter + 1;
     const auto correction =
@@ -177,7 +177,7 @@ class SparseAdamOp final : public Operator<Context> {
   bool DoRunWithType() {
     const auto* lr = Input(LR).template data<T>();
     const auto iter =
-        OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+        OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
 
     const auto t = iter + 1;
     const auto correction =
@@ -287,7 +287,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
   bool DoRunWithType() {
     const auto* lr = Input(LR).template data<T>();
     const auto iter =
-        OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+        OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
 
     const auto t = iter + 1;
     const auto correction =
diff --git a/caffe2/sgd/adam_op_gpu.cu b/caffe2/sgd/adam_op_gpu.cu
index 8eb1b8835c..8fdde749a4 100644
--- a/caffe2/sgd/adam_op_gpu.cu
+++ b/caffe2/sgd/adam_op_gpu.cu
@@ -129,7 +129,7 @@ bool SparseAdamOp<float, CUDAContext>::DoRunWithType() {
   auto N = Input(GRAD).size();
   auto grad_slice_sz = Input(GRAD).size_from_dim(Input(INDICES).ndim());
   const auto iter =
-      OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+    OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
   const float correction = sqrtf(1.0f - std::pow(beta2_, iter + 1)) /
       (1.0f - std::pow(beta1_, iter + 1));
 
diff --git a/caffe2/sgd/fp16_momentum_sgd_op.h b/caffe2/sgd/fp16_momentum_sgd_op.h
index 85a9d53396..556b8a21f0 100644
--- a/caffe2/sgd/fp16_momentum_sgd_op.h
+++ b/caffe2/sgd/fp16_momentum_sgd_op.h
@@ -35,9 +35,10 @@ class FP16MomentumSGDUpdateOp final : public Operator<Context> {
         fp32_update_(OperatorBase::GetSingleArgument<int>("fp32_update", 0)) {}
 
   bool RunOnDevice() override {
+    auto device_type = Context::GetDeviceType();
     // Iter live on the CPU
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
     CAFFE_ENFORCE(Input(LR).size() == 1);
     CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
     Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/fp32_momentum_sgd_op.h b/caffe2/sgd/fp32_momentum_sgd_op.h
index 25ca516eee..d94de7b7ac 100644
--- a/caffe2/sgd/fp32_momentum_sgd_op.h
+++ b/caffe2/sgd/fp32_momentum_sgd_op.h
@@ -31,9 +31,10 @@ class FP32MomentumSGDUpdateOp final : public Operator<Context> {
         nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
 
   bool RunOnDevice() override {
+    auto device_type = Context::GetDeviceType();
     // Iter live on the CPU
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
     CAFFE_ENFORCE(Input(LR).size() == 1);
     CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
     Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/iter_op.h b/caffe2/sgd/iter_op.h
index 13681d7db3..91709f47f3 100644
--- a/caffe2/sgd/iter_op.h
+++ b/caffe2/sgd/iter_op.h
@@ -38,19 +38,20 @@ class IterOp final : public Operator<Context> {
 
   bool RunOnDevice() override {
     if (InputSize() == 0) {
-      if (!OperatorBase::OutputIsType<TensorCPU>(0)) {
+      LOG(INFO) << "[Input size is zero]";
+      if (!OperatorBase::OutputIsType<Tensor>(0, CPU)) {
         // This is the first run; set the iter to start with 0.
         LOG(ERROR) << "You are using an old definition of IterOp that will "
                       "be deprecated soon. More specifically, IterOp now "
                       "requires an explicit in-place input and output.";
 
-        auto* output = OperatorBase::Output<TensorCPU>(0);
+        auto* output = OperatorBase::Output<Tensor>(0, CPU);
         VLOG(1) << "Initializing iter counter.";
         output->Resize(1);
         output->template mutable_data<int64_t>()[0] = 0;
       }
     }
-    IncrementIter(OperatorBase::Output<TensorCPU>(0));
+    IncrementIter(OperatorBase::Output<Tensor>(0, CPU));
     return true;
   }
 };
@@ -67,7 +68,7 @@ class AtomicIterOp final : public Operator<Context> {
   bool RunOnDevice() override {
     auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
     std::lock_guard<std::mutex> lg(*mutex);
-    IncrementIter(OperatorBase::Output<TensorCPU>(0));
+    IncrementIter(OperatorBase::Output<Tensor>(0, CPU));
     CAFFE_EVENT(stats_, num_iter);
     return true;
   }
diff --git a/caffe2/sgd/learning_rate_op.h b/caffe2/sgd/learning_rate_op.h
index 0a47b6c5fd..bd813ce653 100644
--- a/caffe2/sgd/learning_rate_op.h
+++ b/caffe2/sgd/learning_rate_op.h
@@ -27,12 +27,12 @@ class LearningRateOp final : public Operator<Context> {
 
   bool RunOnDevice() override {
     int64_t iter =
-        OperatorBase::Input<TensorCPU>(0).template data<int64_t>()[0];
+        OperatorBase::Input<Tensor>(0, CPU).template data<int64_t>()[0];
     T learning_rate = cur_base_lr_ * (*functor_)(iter);
     // Write to output.
     auto* output = Output(0);
     output->Resize(vector<TIndex>());
-    context_.template Copy<T, CPUContext, Context>(
+    context_.template CopyFromCPU<T>(
         1, &learning_rate, Output(0)->template mutable_data<T>());
     return true;
   }
diff --git a/caffe2/sgd/momentum_sgd_op.h b/caffe2/sgd/momentum_sgd_op.h
index 23da3d420c..f3f75f6421 100644
--- a/caffe2/sgd/momentum_sgd_op.h
+++ b/caffe2/sgd/momentum_sgd_op.h
@@ -45,9 +45,10 @@ class MomentumSGDOp final : public Operator<Context> {
         nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
 
   bool RunOnDevice() override {
+    auto device_type = Context::GetDeviceType();
     // Iter live on the CPU
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
     CAFFE_ENFORCE(Input(LR).size() == 1);
     CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
     Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
@@ -84,9 +85,10 @@ class MomentumSGDUpdateOp final : public Operator<Context> {
         nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
 
   bool RunOnDevice() override {
+    auto device_type = Context::GetDeviceType();
     // Iter live on the CPU
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
-    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+    CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
     CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
     CAFFE_ENFORCE_EQ(Input(GRAD).size(), Input(MOMENTUM).size());
     Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/yellowfin_op.h b/caffe2/sgd/yellowfin_op.h
index 02403ea536..06ecc177c8 100644
--- a/caffe2/sgd/yellowfin_op.h
+++ b/caffe2/sgd/yellowfin_op.h
@@ -126,21 +126,21 @@ CAFFE2_YF_READ_INPUT(SCALARS_MEMORY, scalars_memory)
 CAFFE2_YF_READ_INPUT(GRAD, grad)
 #undef CAFFE2_YF_READ_OUTPUT
 
-    CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
-    CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1);
-    CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1);
-    CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim());
-    CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim());
-    CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim());
-    CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim());
-    for (int i = 0; i < param_tensor.ndim(); ++i) {
-      CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
-      CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
-      CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
-      CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i));
+CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU));
+CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1);
+CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1);
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim());
+for (int i = 0; i < param_tensor.ndim(); ++i) {
+  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
+  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
+  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
+  CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i));
     }
 
-    iter_ = OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+    iter_ = OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
 
     D_ = param_tensor.size();
 
@@ -229,8 +229,8 @@ CAFFE2_YF_READ_INPUT(GRAD, grad)
   int D_;
 
 // Temporary memory on device, listed all variables used in calculations
-#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
-  Tensor<Context> NAME##_tensor_;     \
+#define CAFFE2_YF_DEFINE_TENSOR(NAME)              \
+  Tensor NAME##_tensor_{Context::GetDeviceType()}; \
   T* NAME##_;
 
   CAFFE2_YF_DEFINE_TENSOR(aux_vector)
@@ -255,7 +255,7 @@ CAFFE2_YF_READ_INPUT(GRAD, grad)
   CAFFE2_YF_DEFINE_TENSOR(mu_deb)
   CAFFE2_YF_DEFINE_TENSOR(variance)
 
-  Tensor<Context> scratch_tensor_;
+  Tensor scratch_tensor_{Context::GetDeviceType()};
 
 #undef CAFFE2_YF_DEFINE_TENSOR
author	Jerry Zhang <jerryzh@fb.com>	2018-07-26 10:11:53 -0700
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>	2018-07-26 10:25:23 -0700
commit	cd5adc7b5f7a5d4415a936c5ceca219f14dbb319 (patch)
tree	5e0ab493963fe277aeb8b9cb41db255d3bc6025a /caffe2/sgd
parent	2c7e7e37a60a294e1a0583b7d92bff6b1e61cf55 (diff)
download	pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.gz pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.bz2 pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.zip