summaryrefslogtreecommitdiff
path: root/caffe2/sgd
diff options
context:
space:
mode:
authorJerry Zhang <jerryzh@fb.com>2018-07-26 10:11:53 -0700
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2018-07-26 10:25:23 -0700
commitcd5adc7b5f7a5d4415a936c5ceca219f14dbb319 (patch)
tree5e0ab493963fe277aeb8b9cb41db255d3bc6025a /caffe2/sgd
parent2c7e7e37a60a294e1a0583b7d92bff6b1e61cf55 (diff)
downloadpytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.gz
pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.tar.bz2
pytorch-cd5adc7b5f7a5d4415a936c5ceca219f14dbb319.zip
Remove template parameter from Tensor (#13)
Summary: Pull Request resolved: https://github.com/facebookresearch/weakly-supervised-action-detection/pull/13 Pull Request resolved: https://github.com/pytorch/translate/pull/166 Pull Request resolved: https://github.com/pytorch/pytorch/pull/9125 Closes https://github.com/pytorch/pytorch/pull/9125 Use inheritance for polymorphism, and remove template parameter This is to change the templating in call sites, the core implementations will change later Before Caffe2 Tensor class was compile-time fixed to bind to a particular device/context. With this change, we're making it a runtime property (stored inside the tensor), but preserve the same semantics. For example, one has to specify device type in order to create a Tensor - there are no uninitialized tensors. More specifically the changes are: 1. We added an extra argument *DeviceType* to most of the constructors of the tensor, e.g. (Tensor(DeviceType type)), 2. Semantics of constructor Tensor(const Tensor<SrcContext>& src, ContextForCopy* context); is changed, in this constructor, the second context is passed in to enable us to call the templated Copy function, it could be in a different context as source and target previously, now we'll enforce that the context should have same device type as src, if it is provided. 3. To preserve 'get-or-construct' semantics of Blob, we added specialized getter Blob::GetMutableTensor that verifies both that Blob contains a Tensor and that it's of a correct type 4. Specifically, Tensor type is not default-constructible any more (as we don't have unknown device tensors) and thus some of the code handling STL containers needs to change Note: Some changes are postponed just to keep this diff a bit smaller. Please see `TODO`s. Reviewed By: xw285cornell Differential Revision: D8121878 fbshipit-source-id: 4a5e9a677ba4ac82095df959851a054c81eccf81
Diffstat (limited to 'caffe2/sgd')
-rw-r--r--caffe2/sgd/adam_op.h8
-rw-r--r--caffe2/sgd/adam_op_gpu.cu2
-rw-r--r--caffe2/sgd/fp16_momentum_sgd_op.h5
-rw-r--r--caffe2/sgd/fp32_momentum_sgd_op.h5
-rw-r--r--caffe2/sgd/iter_op.h9
-rw-r--r--caffe2/sgd/learning_rate_op.h4
-rw-r--r--caffe2/sgd/momentum_sgd_op.h10
-rw-r--r--caffe2/sgd/yellowfin_op.h32
8 files changed, 40 insertions, 35 deletions
diff --git a/caffe2/sgd/adam_op.h b/caffe2/sgd/adam_op.h
index bb30247ca5..c25509b3d0 100644
--- a/caffe2/sgd/adam_op.h
+++ b/caffe2/sgd/adam_op.h
@@ -88,7 +88,7 @@ class AdamOp final : public Operator<Context> {
epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
bool RunOnDevice() override {
// Iter live on the CPU
- CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU));
CAFFE_ENFORCE(Input(LR).size() == 1);
CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size());
CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size());
@@ -98,7 +98,7 @@ class AdamOp final : public Operator<Context> {
Output(OUTPUT_MOMENT_2)->ResizeLike(Input(MOMENT_2));
const auto iter =
- OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+ OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
const auto t = iter + 1;
const auto correction =
@@ -177,7 +177,7 @@ class SparseAdamOp final : public Operator<Context> {
bool DoRunWithType() {
const auto* lr = Input(LR).template data<T>();
const auto iter =
- OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+ OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
const auto t = iter + 1;
const auto correction =
@@ -287,7 +287,7 @@ class RowWiseSparseAdamOp final : public Operator<Context> {
bool DoRunWithType() {
const auto* lr = Input(LR).template data<T>();
const auto iter =
- OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+ OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
const auto t = iter + 1;
const auto correction =
diff --git a/caffe2/sgd/adam_op_gpu.cu b/caffe2/sgd/adam_op_gpu.cu
index 8eb1b8835c..8fdde749a4 100644
--- a/caffe2/sgd/adam_op_gpu.cu
+++ b/caffe2/sgd/adam_op_gpu.cu
@@ -129,7 +129,7 @@ bool SparseAdamOp<float, CUDAContext>::DoRunWithType() {
auto N = Input(GRAD).size();
auto grad_slice_sz = Input(GRAD).size_from_dim(Input(INDICES).ndim());
const auto iter =
- OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+ OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
const float correction = sqrtf(1.0f - std::pow(beta2_, iter + 1)) /
(1.0f - std::pow(beta1_, iter + 1));
diff --git a/caffe2/sgd/fp16_momentum_sgd_op.h b/caffe2/sgd/fp16_momentum_sgd_op.h
index 85a9d53396..556b8a21f0 100644
--- a/caffe2/sgd/fp16_momentum_sgd_op.h
+++ b/caffe2/sgd/fp16_momentum_sgd_op.h
@@ -35,9 +35,10 @@ class FP16MomentumSGDUpdateOp final : public Operator<Context> {
fp32_update_(OperatorBase::GetSingleArgument<int>("fp32_update", 0)) {}
bool RunOnDevice() override {
+ auto device_type = Context::GetDeviceType();
// Iter live on the CPU
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
CAFFE_ENFORCE(Input(LR).size() == 1);
CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/fp32_momentum_sgd_op.h b/caffe2/sgd/fp32_momentum_sgd_op.h
index 25ca516eee..d94de7b7ac 100644
--- a/caffe2/sgd/fp32_momentum_sgd_op.h
+++ b/caffe2/sgd/fp32_momentum_sgd_op.h
@@ -31,9 +31,10 @@ class FP32MomentumSGDUpdateOp final : public Operator<Context> {
nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
bool RunOnDevice() override {
+ auto device_type = Context::GetDeviceType();
// Iter live on the CPU
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
CAFFE_ENFORCE(Input(LR).size() == 1);
CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/iter_op.h b/caffe2/sgd/iter_op.h
index 13681d7db3..91709f47f3 100644
--- a/caffe2/sgd/iter_op.h
+++ b/caffe2/sgd/iter_op.h
@@ -38,19 +38,20 @@ class IterOp final : public Operator<Context> {
bool RunOnDevice() override {
if (InputSize() == 0) {
- if (!OperatorBase::OutputIsType<TensorCPU>(0)) {
+ LOG(INFO) << "[Input size is zero]";
+ if (!OperatorBase::OutputIsType<Tensor>(0, CPU)) {
// This is the first run; set the iter to start with 0.
LOG(ERROR) << "You are using an old definition of IterOp that will "
"be deprecated soon. More specifically, IterOp now "
"requires an explicit in-place input and output.";
- auto* output = OperatorBase::Output<TensorCPU>(0);
+ auto* output = OperatorBase::Output<Tensor>(0, CPU);
VLOG(1) << "Initializing iter counter.";
output->Resize(1);
output->template mutable_data<int64_t>()[0] = 0;
}
}
- IncrementIter(OperatorBase::Output<TensorCPU>(0));
+ IncrementIter(OperatorBase::Output<Tensor>(0, CPU));
return true;
}
};
@@ -67,7 +68,7 @@ class AtomicIterOp final : public Operator<Context> {
bool RunOnDevice() override {
auto& mutex = OperatorBase::Input<std::unique_ptr<std::mutex>>(0);
std::lock_guard<std::mutex> lg(*mutex);
- IncrementIter(OperatorBase::Output<TensorCPU>(0));
+ IncrementIter(OperatorBase::Output<Tensor>(0, CPU));
CAFFE_EVENT(stats_, num_iter);
return true;
}
diff --git a/caffe2/sgd/learning_rate_op.h b/caffe2/sgd/learning_rate_op.h
index 0a47b6c5fd..bd813ce653 100644
--- a/caffe2/sgd/learning_rate_op.h
+++ b/caffe2/sgd/learning_rate_op.h
@@ -27,12 +27,12 @@ class LearningRateOp final : public Operator<Context> {
bool RunOnDevice() override {
int64_t iter =
- OperatorBase::Input<TensorCPU>(0).template data<int64_t>()[0];
+ OperatorBase::Input<Tensor>(0, CPU).template data<int64_t>()[0];
T learning_rate = cur_base_lr_ * (*functor_)(iter);
// Write to output.
auto* output = Output(0);
output->Resize(vector<TIndex>());
- context_.template Copy<T, CPUContext, Context>(
+ context_.template CopyFromCPU<T>(
1, &learning_rate, Output(0)->template mutable_data<T>());
return true;
}
diff --git a/caffe2/sgd/momentum_sgd_op.h b/caffe2/sgd/momentum_sgd_op.h
index 23da3d420c..f3f75f6421 100644
--- a/caffe2/sgd/momentum_sgd_op.h
+++ b/caffe2/sgd/momentum_sgd_op.h
@@ -45,9 +45,10 @@ class MomentumSGDOp final : public Operator<Context> {
nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
bool RunOnDevice() override {
+ auto device_type = Context::GetDeviceType();
// Iter live on the CPU
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
CAFFE_ENFORCE(Input(LR).size() == 1);
CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENTUM).size());
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
@@ -84,9 +85,10 @@ class MomentumSGDUpdateOp final : public Operator<Context> {
nesterov_(OperatorBase::GetSingleArgument<int>("nesterov", 0)) {}
bool RunOnDevice() override {
+ auto device_type = Context::GetDeviceType();
// Iter live on the CPU
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(GRAD));
- CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor<Context>>(MOMENTUM));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(GRAD, device_type));
+ CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(MOMENTUM, device_type));
CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
CAFFE_ENFORCE_EQ(Input(GRAD).size(), Input(MOMENTUM).size());
Output(OUTPUT_GRAD)->ResizeLike(Input(GRAD));
diff --git a/caffe2/sgd/yellowfin_op.h b/caffe2/sgd/yellowfin_op.h
index 02403ea536..06ecc177c8 100644
--- a/caffe2/sgd/yellowfin_op.h
+++ b/caffe2/sgd/yellowfin_op.h
@@ -126,21 +126,21 @@ CAFFE2_YF_READ_INPUT(SCALARS_MEMORY, scalars_memory)
CAFFE2_YF_READ_INPUT(GRAD, grad)
#undef CAFFE2_YF_READ_OUTPUT
- CAFFE_ENFORCE(OperatorBase::InputIsType<TensorCPU>(ITER));
- CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1);
- CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1);
- CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim());
- CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim());
- CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim());
- CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim());
- for (int i = 0; i < param_tensor.ndim(); ++i) {
- CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
- CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
- CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
- CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i));
+CAFFE_ENFORCE(OperatorBase::InputIsType<Tensor>(ITER, CPU));
+CAFFE_ENFORCE_EQ(lr_avg_tensor.size(), 1);
+CAFFE_ENFORCE_EQ(mu_avg_tensor.size(), 1);
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), moment_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), g_avg_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), g2_avg_tensor.ndim());
+CAFFE_ENFORCE_EQ(param_tensor.ndim(), grad_tensor.ndim());
+for (int i = 0; i < param_tensor.ndim(); ++i) {
+ CAFFE_ENFORCE_EQ(param_tensor.dim32(i), moment_tensor.dim32(i));
+ CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g_avg_tensor.dim32(i));
+ CAFFE_ENFORCE_EQ(param_tensor.dim32(i), g2_avg_tensor.dim32(i));
+ CAFFE_ENFORCE_EQ(param_tensor.dim32(i), grad_tensor.dim32(i));
}
- iter_ = OperatorBase::Input<TensorCPU>(ITER).template data<int64_t>()[0];
+ iter_ = OperatorBase::Input<Tensor>(ITER, CPU).template data<int64_t>()[0];
D_ = param_tensor.size();
@@ -229,8 +229,8 @@ CAFFE2_YF_READ_INPUT(GRAD, grad)
int D_;
// Temporary memory on device, listed all variables used in calculations
-#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
- Tensor<Context> NAME##_tensor_; \
+#define CAFFE2_YF_DEFINE_TENSOR(NAME) \
+ Tensor NAME##_tensor_{Context::GetDeviceType()}; \
T* NAME##_;
CAFFE2_YF_DEFINE_TENSOR(aux_vector)
@@ -255,7 +255,7 @@ CAFFE2_YF_READ_INPUT(GRAD, grad)
CAFFE2_YF_DEFINE_TENSOR(mu_deb)
CAFFE2_YF_DEFINE_TENSOR(variance)
- Tensor<Context> scratch_tensor_;
+ Tensor scratch_tensor_{Context::GetDeviceType()};
#undef CAFFE2_YF_DEFINE_TENSOR