diff options
author | Jerry Zhang <jerryzh@fb.com> | 2018-07-26 10:11:53 -0700 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2018-07-26 10:25:23 -0700 |
commit | cd5adc7b5f7a5d4415a936c5ceca219f14dbb319 (patch) | |
tree | 5e0ab493963fe277aeb8b9cb41db255d3bc6025a /caffe2/sgd | |
parent | 2c7e7e37a60a294e1a0583b7d92bff6b1e61cf55 (diff) | |
download | pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.gz pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.bz2 pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.zip |
Remove template parameter from Tensor (#13)
Summary:
Pull Request resolved: https://github.com/facebookresearch/weakly-supervised-action-detection/pull/13
Pull Request resolved: https://github.com/pytorch/translate/pull/166
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9125
Closes https://github.com/pytorch/pytorch/pull/9125
Use inheritance for polymorphism, and remove template parameter
This is to change the templating in call sites, the core implementations will change later
Before Caffe2 Tensor class was compile-time fixed to bind to a particular device/context. With this change, we're making it a runtime property (stored inside the tensor), but preserve the same semantics. For example, one has to specify device type in order to create a Tensor - there are no uninitialized tensors. More specifically the changes are:
1. We added an extra argument *DeviceType* to most of the constructors of the tensor, e.g. (Tensor(DeviceType type)),
2. Semantics of constructor Tensor(const Tensor<SrcContext>& src, ContextForCopy* context); is changed, in this constructor, the second context is passed in to enable us to call the templated Copy function, it could be in a different context as source and target previously, now we'll enforce that the context should have same device type as src, if it is provided.
3. To preserve 'get-or-construct' semantics of Blob, we added specialized getter Blob::GetMutableTensor that verifies both that Blob contains a Tensor and that it's of a correct type
4. Specifically, Tensor type is not default-constructible any more (as we don't have unknown device tensors) and thus some of the code handling STL containers needs to change
Note: Some changes are postponed just to keep this diff a bit smaller. Please see `TODO`s.
Reviewed By: xw285cornell
Differential Revision: D8121878
fbshipit-source-id: 4a5e9a677ba4ac82095df959851a054c81eccf81
Diffstat (limited to 'caffe2/sgd')
-rw-r--r-- | caffe2/sgd/adam_op.h | 8 | ||||
-rw-r--r-- | caffe2/sgd/adam_op_gpu.cu | 2 | ||||
-rw-r--r-- | caffe2/sgd/fp16_momentum_sgd_op.h | 5 | ||||
-rw-r--r-- | caffe2/sgd/fp32_momentum_sgd_op.h | 5 | ||||
-rw-r--r-- | caffe2/sgd/iter_op.h | 9 | ||||
-rw-r--r-- | caffe2/sgd/learning_rate_op.h | 4 | ||||
-rw-r--r-- | caffe2/sgd/momentum_sgd_op.h | 10 | ||||
-rw-r--r-- | caffe2/sgd/yellowfin_op.h | 32 |
8 files changed, 40 insertions, 35 deletions
diff --git a/caffe2/sgd/adam_op.h b/caffe2/sgd/adam_op.h index bb30247ca5..c25509b3d0 100644 --- a/caffe2/sgd/adam_op.h +++ b/caffe2/sgd/adam_op.h @@ -88,7 +88,7 @@ class AdamOp final : public Operator<Context> { epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {} bool RunOnDevice() override { // Iter live on the CPU - CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU)); CAFFE_ENFORCE(Input(LR).size() == 1); CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size()); CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size()); @@ -98,7 +98,7 @@ class AdamOp final : public Operator<Context> { Output(OUTPUT_MOMENT_2)->ResizeLike(Input(MOMENT_2)); const auto iter = - OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0]; + OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0]; const auto t = iter + 1; const auto correction = @@ -177,7 +177,7 @@ class SparseAdamOp final : public Operator<Context> { bool DoRunWithType() { const auto* lr = Input(LR).template data<T>(); const auto iter = - OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0]; + OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0]; const auto t = iter + 1; const auto correction = @@ -287,7 +287,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> { bool DoRunWithType() { const auto* lr = Input(LR).template data<T>(); const auto iter = - OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0]; + OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0]; const auto t = iter + 1; const auto correction = diff --git a/caffe2/sgd/adam_op_gpu.cu b/caffe2/sgd/adam_op_gpu.cu index 8eb1b8835c..8fdde749a4 100644 --- a/caffe2/sgd/adam_op_gpu.cu +++ b/caffe2/sgd/adam_op_gpu.cu @@ -129,7 +129,7 @@ bool SparseAdamOp<float, CUDAContext>::DoRunWithType() { auto N = Input(GRAD).size(); auto grad_slice_sz = Input(GRAD).size_from_dim(Input(INDICES).ndim()); const auto iter = - OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0]; + OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0]; const float correction = sqrtf(1.0f - std::pow(beta2_, iter + 1)) / (1.0f - std::pow(beta1_, iter + 1)); diff --git a/caffe2/sgd/fp16_momentum_sgd_op.h b/caffe2/sgd/fp16_momentum_sgd_op.h index 85a9d53396..556b8a21f0 100644 --- a/caffe2/sgd/fp16_momentum_sgd_op.h +++ b/caffe2/sgd/fp16_momentum_sgd_op.h @@ -35,9 +35,10 @@ class FP16MomentumSGDUpdateOp final : public Operator<Context> { fp32_update_(OperatorBase::GetSingleArgument<int>("fp32_update", 0)) {} bool RunOnDevice() override { + auto device_type = Context::GetDeviceType(); // Iter live on the CPU - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD)); - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type)); CAFFE_ENFORCE(Input(LR).size() == 1); CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size()); Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD)); diff --git a/caffe2/sgd/fp32_momentum_sgd_op.h b/caffe2/sgd/fp32_momentum_sgd_op.h index 25ca516eee..d94de7b7ac 100644 --- a/caffe2/sgd/fp32_momentum_sgd_op.h +++ b/caffe2/sgd/fp32_momentum_sgd_op.h @@ -31,9 +31,10 @@ class FP32MomentumSGDUpdateOp final : public Operator<Context> { nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {} bool RunOnDevice() override { + auto device_type = Context::GetDeviceType(); // Iter live on the CPU - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD)); - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type)); CAFFE_ENFORCE(Input(LR).size() == 1); CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size()); Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD)); diff --git a/caffe2/sgd/iter_op.h b/caffe2/sgd/iter_op.h index 13681d7db3..91709f47f3 100644 --- a/caffe2/sgd/iter_op.h +++ b/caffe2/sgd/iter_op.h @@ -38,19 +38,20 @@ class IterOp final : public Operator<Context> { bool RunOnDevice() override { if (InputSize() == 0) { - if (!OperatorBase::OutputIsType<TensorCPU>(0)) { + LOG(INFO) << "[Input size is zero]"; + if (!OperatorBase::OutputIsType<Tensor>(0, CPU)) { // This is the first run; set the iter to start with 0. LOG(ERROR) << "You are using an old definition of IterOp that will " "be deprecated soon. More specifically, IterOp now " "requires an explicit in-place input and output."; - auto* output = OperatorBase::Output<TensorCPU>(0); + auto* output = OperatorBase::Output<Tensor>(0, CPU); VLOG(1) << "Initializing iter counter."; output->Resize(1); output->template mutable_data<int64_t>()[0] = 0; } } - IncrementIter(OperatorBase::Output<TensorCPU>(0)); + IncrementIter(OperatorBase::Output<Tensor>(0, CPU)); return true; } }; @@ -67,7 +68,7 @@ class AtomicIterOp final : public Operator<Context> { bool RunOnDevice() override { auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0); std::lock_guard<std::mutex> lg(*mutex); - IncrementIter(OperatorBase::Output<TensorCPU>(0)); + IncrementIter(OperatorBase::Output<Tensor>(0, CPU)); CAFFE_EVENT(stats_, num_iter); return true; } diff --git a/caffe2/sgd/learning_rate_op.h b/caffe2/sgd/learning_rate_op.h index 0a47b6c5fd..bd813ce653 100644 --- a/caffe2/sgd/learning_rate_op.h +++ b/caffe2/sgd/learning_rate_op.h @@ -27,12 +27,12 @@ class LearningRateOp final : public Operator<Context> { bool RunOnDevice() override { int64_t iter = - OperatorBase::Input<TensorCPU>(0).template data<int64_t>()[0]; + OperatorBase::Input<Tensor>(0, CPU).template data<int64_t>()[0]; T learning_rate = cur_base_lr_ * (*functor_)(iter); // Write to output. auto* output = Output(0); output->Resize(vector<TIndex>()); - context_.template Copy<T, CPUContext, Context>( + context_.template CopyFromCPU<T>( 1, &learning_rate, Output(0)->template mutable_data<T>()); return true; } diff --git a/caffe2/sgd/momentum_sgd_op.h b/caffe2/sgd/momentum_sgd_op.h index 23da3d420c..f3f75f6421 100644 --- a/caffe2/sgd/momentum_sgd_op.h +++ b/caffe2/sgd/momentum_sgd_op.h @@ -45,9 +45,10 @@ class MomentumSGDOp final : public Operator<Context> { nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {} bool RunOnDevice() override { + auto device_type = Context::GetDeviceType(); // Iter live on the CPU - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD)); - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type)); CAFFE_ENFORCE(Input(LR).size() == 1); CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size()); Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD)); @@ -84,9 +85,10 @@ class MomentumSGDUpdateOp final : public Operator<Context> { nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {} bool RunOnDevice() override { + auto device_type = Context::GetDeviceType(); // Iter live on the CPU - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD)); - CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type)); + CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type)); CAFFE_ENFORCE_EQ(Input(LR).size(), 1); CAFFE_ENFORCE_EQ(Input(GRAD).size(), Input(MOMENTUM).size()); Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD)); diff --git a/caffe2/sgd/yellowfin_op.h b/caffe2/sgd/yellowfin_op.h index 02403ea536..06ecc177c8 100644 --- a/caffe2/sgd/yellowfin_op.h +++ b/caffe2/sgd/yellowfin_op.h @@ -126,21 +126,21 @@ CAFFE2_YF_READ_INPUT(SCALARS_MEMORY, scalars_memory) CAFFE2_YF_READ_INPUT(GRAD, grad) #undef CAFFE2_YF_READ_OUTPUT - CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER)); - CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1); - CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1); - CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim()); - CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim()); - CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim()); - CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim()); - for (int i = 0; i < param_tensor.ndim(); ++i) { - CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i)); - CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i)); - CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i)); - CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i)); +CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU)); +CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1); +CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1); +CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim()); +CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim()); +CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim()); +CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim()); +for (int i = 0; i < param_tensor.ndim(); ++i) { + CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i)); + CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i)); + CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i)); + CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i)); } - iter_ = OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0]; + iter_ = OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0]; D_ = param_tensor.size(); @@ -229,8 +229,8 @@ CAFFE2_YF_READ_INPUT(GRAD, grad) int D_; // Temporary memory on device, listed all variables used in calculations -#define CAFFE2_YF_DEFINE_TENSOR(NAME) \ - Tensor<Context> NAME##_tensor_; \ +#define CAFFE2_YF_DEFINE_TENSOR(NAME) \ + Tensor NAME##_tensor_{Context::GetDeviceType()}; \ T* NAME##_; CAFFE2_YF_DEFINE_TENSOR(aux_vector) @@ -255,7 +255,7 @@ CAFFE2_YF_READ_INPUT(GRAD, grad) CAFFE2_YF_DEFINE_TENSOR(mu_deb) CAFFE2_YF_DEFINE_TENSOR(variance) - Tensor<Context> scratch_tensor_; + Tensor scratch_tensor_{Context::GetDeviceType()}; #undef CAFFE2_YF_DEFINE_TENSOR |