summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Donahue <jeff.donahue@gmail.com>2014-07-11 01:48:36 -0700
committerJeff Donahue <jeff.donahue@gmail.com>2014-08-13 11:57:33 -0700
commit7a3ed9b8edf43895770b63cb4d9f5cacf0dba047 (patch)
tree25ae32e517230009f913eff910a25316fbc5fb3e
parentd0cae53dd5291331ca7da0dfef0e7ff54e8e0bac (diff)
downloadcaffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.gz
caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.bz2
caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.zip
Add net tests for loss_weight.
Check that the loss and gradients throughout the net are appropriately scaled for a few loss_weight values, assuming a default weight of 1 in the loss layer only. Also modify test_gradient_check_util to associate a loss of 2 rather than 1 with the top blob, so that loss layer tests fail if they don't scale their diffs.
-rw-r--r--include/caffe/test/test_gradient_check_util.hpp37
-rw-r--r--src/caffe/test/test_net.cpp166
2 files changed, 176 insertions, 27 deletions
diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp
index 4cf2cbc9..5a8d382f 100644
--- a/include/caffe/test/test_gradient_check_util.hpp
+++ b/include/caffe/test/test_gradient_check_util.hpp
@@ -57,8 +57,8 @@ class GradientChecker {
const vector<Blob<Dtype>*>& input);
protected:
- Dtype GetObjAndGradient(vector<Blob<Dtype>*>* top, int top_id = -1,
- int top_data_id = -1);
+ Dtype GetObjAndGradient(const Layer<Dtype>& layer, vector<Blob<Dtype>*>* top,
+ int top_id = -1, int top_data_id = -1);
Dtype stepsize_;
Dtype threshold_;
unsigned int seed_;
@@ -97,10 +97,11 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
}
// Compute the gradient analytically using Backward
Caffe::set_random_seed(seed_);
- // Get any loss from the layer
- Dtype computed_objective = layer->Forward(*bottom, top);
+ // Ignore the loss from the layer (it's just the weighted sum of the losses
+ // from the top blobs, whose gradients we may want to test individually).
+ layer->Forward(*bottom, top);
// Get additional loss from the objective
- computed_objective += GetObjAndGradient(top, top_id, top_data_id);
+ GetObjAndGradient(*layer, top, top_id, top_data_id);
layer->Backward(*top, propagate_down, bottom);
// Store computed gradients for all checked blobs
vector<shared_ptr<Blob<Dtype> > >
@@ -131,18 +132,22 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
// i != top_data_id, we know the derivative is 0 by definition, and simply
// check that that's true.
Dtype estimated_gradient = 0;
+ Dtype positive_objective = 0;
+ Dtype negative_objective = 0;
if (!element_wise || (feat_id == top_data_id)) {
// Do finite differencing.
// Compute loss with stepsize_ added to input.
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
Caffe::set_random_seed(seed_);
- Dtype positive_objective = layer->Forward(*bottom, top);
- positive_objective += GetObjAndGradient(top, top_id, top_data_id);
+ layer->Forward(*bottom, top);
+ positive_objective =
+ GetObjAndGradient(*layer, top, top_id, top_data_id);
// Compute loss with stepsize_ subtracted from input.
current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
Caffe::set_random_seed(seed_);
- Dtype negative_objective = layer->Forward(*bottom, top);
- negative_objective += GetObjAndGradient(top, top_id, top_data_id);
+ layer->Forward(*bottom, top);
+ negative_objective =
+ GetObjAndGradient(*layer, top, top_id, top_data_id);
// Recover original input value.
current_blob->mutable_cpu_data()[feat_id] += stepsize_;
estimated_gradient = (positive_objective - negative_objective) /
@@ -160,7 +165,10 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
std::max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
<< "debug: (top_id, top_data_id, blob_id, feat_id)="
- << top_id << "," << top_data_id << "," << blob_id << "," << feat_id;
+ << top_id << "," << top_data_id << "," << blob_id << "," << feat_id
+ << "; feat = " << feature
+ << "; objective+ = " << positive_objective
+ << "; objective- = " << negative_objective;
}
// LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
// LOG(ERROR) << "computed gradient: " << computed_gradient
@@ -212,8 +220,8 @@ void GradientChecker<Dtype>::CheckGradientNet(
}
template <typename Dtype>
-Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top,
- int top_id, int top_data_id) {
+Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer,
+ vector<Blob<Dtype>*>* top, int top_id, int top_data_id) {
Dtype loss = 0;
if (top_id < 0) {
// the loss will be half of the sum of squares of all outputs
@@ -236,8 +244,9 @@ Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top,
Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
caffe_set(top_blob->count(), Dtype(0), top_blob_diff);
}
- loss = (*top)[top_id]->cpu_data()[top_data_id];
- (*top)[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
+ const Dtype loss_weight = 2;
+ loss = (*top)[top_id]->cpu_data()[top_data_id] * loss_weight;
+ (*top)[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight;
}
return loss;
}
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 18bc9ad7..acd3bcdd 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -141,7 +141,11 @@ class NetTest : public MultiDeviceTest<TypeParam> {
InitNetFromProtoString(proto);
}
- virtual void InitTrickyNet() {
+ virtual void InitTrickyNet(Dtype* loss_weight = NULL) {
+ ostringstream loss_weight_stream;
+ if (loss_weight) {
+ loss_weight_stream << " top_loss_weight: " << *loss_weight << " ";
+ }
const string& proto =
"name: 'TrickyTestNetwork' "
"layers: { "
@@ -208,19 +212,27 @@ class NetTest : public MultiDeviceTest<TypeParam> {
"} "
"layers: { "
" name: 'loss' "
- " type: SOFTMAX_LOSS "
+ " type: SOFTMAX_LOSS " +
+ loss_weight_stream.str() +
" bottom: 'transformed_data' "
" bottom: 'transformed_label' "
"} ";
InitNetFromProtoString(proto);
}
- virtual void InitUnsharedWeightsNet(const bool bias_term = false,
+ virtual void InitUnsharedWeightsNet(Dtype* loss_weight,
+ const bool force_backward = false, const bool bias_term = false,
const Dtype blobs_lr_w1 = 1, const Dtype blobs_lr_b1 = 2,
const Dtype blobs_lr_w2 = 1, const Dtype blobs_lr_b2 = 2) {
ostringstream proto;
+ if (loss_weight) {
+ loss_weight_stream << " top_loss_weight: " << *loss_weight << " ";
+ }
+ proto << "name: 'UnsharedWeightsNetwork' ";
+ if (force_backward) {
+ proto << "force_backward: true ";
+ }
proto <<
- "name: 'UnsharedWeightsNetwork' "
"layers: { "
" name: 'data' "
" type: DUMMY_DATA "
@@ -286,7 +298,11 @@ class NetTest : public MultiDeviceTest<TypeParam> {
"} "
"layers: { "
" name: 'loss' "
- " type: EUCLIDEAN_LOSS "
+ " type: EUCLIDEAN_LOSS ";
+ if (loss_weight) {
+ proto << " top_loss_weight: " << *loss_weight << " ";
+ }
+ proto <<
" bottom: 'innerproduct1' "
" bottom: 'innerproduct2' "
"} ";
@@ -575,6 +591,128 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) {
EXPECT_EQ(true, bottom_need_backward[3][1]);
}
+TYPED_TEST(NetTest, TestLossWeightCPU) {
+ Caffe::set_mode(Caffe::CPU);
+ // First, compute the loss and gradients with no top_loss_weight specified.
+ // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default
+ // to 1.
+ vector<Blob<TypeParam>*> bottom;
+ Caffe::set_random_seed(this->seed_);
+ const bool kForceBackward = true;
+ this->InitUnsharedWeightsNet(NULL, kForceBackward);
+ const TypeParam loss = this->net_->ForwardBackward(bottom);
+ const bool kCopyDiff = true;
+ const bool kReshape = true;
+ const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs();
+ vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size());
+ for (int i = 0; i < net_blobs.size(); ++i) {
+ blob_grads[i].reset(new Blob<TypeParam>());
+ blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
+ }
+ const vector<shared_ptr<Blob<TypeParam> > >& net_params =
+ this->net_->params();
+ vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size());
+ for (int i = 0; i < net_params.size(); ++i) {
+ param_grads[i].reset(new Blob<TypeParam>());
+ param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
+ }
+ // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+ const TypeParam kMinLossAbsValue = 1e-2;
+ ASSERT_GE(fabs(loss), kMinLossAbsValue);
+ const TypeParam kErrorMargin = 1e-5;
+ const int kNumLossWeights = 6;
+ TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+ for (int i = 0; i < kNumLossWeights; ++i) {
+ Caffe::set_random_seed(this->seed_);
+ this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward);
+ const TypeParam weighted_loss = this->net_->ForwardBackward(bottom);
+ const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]);
+ EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+ << "loss weight = " << kLossWeights[i];
+ const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs =
+ this->net_->blobs();
+ ASSERT_EQ(blob_grads.size(), weighted_blobs.size());
+ for (int j = 0; j < blob_grads.size(); ++j) {
+ ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count());
+ for (int k = 0; k < blob_grads[j]->count(); ++k) {
+ EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i],
+ weighted_blobs[j]->cpu_diff()[k], error_margin);
+ }
+ }
+ const vector<shared_ptr<Blob<TypeParam> > >& weighted_params =
+ this->net_->params();
+ ASSERT_EQ(param_grads.size(), weighted_params.size());
+ for (int j = 0; j < param_grads.size(); ++j) {
+ ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count());
+ for (int k = 0; k < param_grads[j]->count(); ++k) {
+ EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i],
+ weighted_params[j]->cpu_diff()[k], error_margin);
+ }
+ }
+ }
+}
+
+TYPED_TEST(NetTest, TestLossWeightGPU) {
+ Caffe::set_mode(Caffe::GPU);
+ // First, compute the loss and gradients with no top_loss_weight specified.
+ // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default
+ // to 1.
+ vector<Blob<TypeParam>*> bottom;
+ Caffe::set_random_seed(this->seed_);
+ const bool kForceBackward = true;
+ this->InitUnsharedWeightsNet(NULL, kForceBackward);
+ const TypeParam loss = this->net_->ForwardBackward(bottom);
+ const bool kCopyDiff = true;
+ const bool kReshape = true;
+ const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs();
+ vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size());
+ for (int i = 0; i < net_blobs.size(); ++i) {
+ blob_grads[i].reset(new Blob<TypeParam>());
+ blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
+ }
+ const vector<shared_ptr<Blob<TypeParam> > >& net_params =
+ this->net_->params();
+ vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size());
+ for (int i = 0; i < net_params.size(); ++i) {
+ param_grads[i].reset(new Blob<TypeParam>());
+ param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
+ }
+ // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+ const TypeParam kMinLossAbsValue = 1e-2;
+ ASSERT_GE(fabs(loss), kMinLossAbsValue);
+ const Dtype kErrorMargin = 1e-4;
+ const int kNumLossWeights = 6;
+ TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+ for (int i = 0; i < kNumLossWeights; ++i) {
+ Caffe::set_random_seed(this->seed_);
+ this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward);
+ const TypeParam weighted_loss = this->net_->ForwardBackward(bottom);
+ const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]);
+ EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+ << "loss weight = " << kLossWeights[i];
+ const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs =
+ this->net_->blobs();
+ ASSERT_EQ(blob_grads.size(), weighted_blobs.size());
+ for (int j = 0; j < blob_grads.size(); ++j) {
+ ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count());
+ for (int k = 0; k < blob_grads[j]->count(); ++k) {
+ EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i],
+ weighted_blobs[j]->cpu_diff()[k], error_margin);
+ }
+ }
+ const vector<shared_ptr<Blob<TypeParam> > >& weighted_params =
+ this->net_->params();
+ ASSERT_EQ(param_grads.size(), weighted_params.size());
+ for (int j = 0; j < param_grads.size(); ++j) {
+ ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count());
+ for (int k = 0; k < param_grads[j]->count(); ++k) {
+ EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i],
+ weighted_params[j]->cpu_diff()[k], error_margin);
+ }
+ }
+ }
+}
+
TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) {
typedef typename TypeParam::Dtype Dtype;
this->InitUnsharedWeightsNet();
@@ -722,12 +860,14 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
typedef typename TypeParam::Dtype Dtype;
vector<Blob<Dtype>*> bottom;
const bool kBiasTerm = true;
+ const bool kForceBackward = false;
+ const Dtype* kLossWeight = NULL;
// Run the net with all params learned; check that gradients are non-zero.
Caffe::set_random_seed(this->seed_);
Dtype blobs_lr_w1 = 1, blobs_lr_w2 = 1, blobs_lr_b1 = 2, blobs_lr_b2 = 2;
- this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
- blobs_lr_b1, blobs_lr_b2);
+ this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+ blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
this->net_->Forward(bottom);
this->net_->Backward();
const vector<shared_ptr<Blob<Dtype> > >& params = this->net_->params();
@@ -746,8 +886,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
// gradients.
Caffe::set_random_seed(this->seed_);
blobs_lr_w1 *= 2, blobs_lr_w2 *= 2, blobs_lr_b1 *= 2, blobs_lr_b2 *= 2;
- this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
- blobs_lr_b1, blobs_lr_b2);
+ this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+ blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
this->net_->Forward(bottom);
this->net_->Backward();
const vector<shared_ptr<Blob<Dtype> > >& params2 = this->net_->params();
@@ -762,8 +902,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
// gradients for those.
Caffe::set_random_seed(this->seed_);
blobs_lr_w1 = 1, blobs_lr_w2 = 0, blobs_lr_b1 = 0, blobs_lr_b2 = 1;
- this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
- blobs_lr_b1, blobs_lr_b2);
+ this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+ blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
this->net_->Forward(bottom);
this->net_->Backward();
const vector<shared_ptr<Blob<Dtype> > >& params3 = this->net_->params();
@@ -781,8 +921,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
// Change the opposite subset of the learning rates to zero.
Caffe::set_random_seed(this->seed_);
blobs_lr_w1 = 0, blobs_lr_w2 = 1, blobs_lr_b1 = 1, blobs_lr_b2 = 0;
- this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
- blobs_lr_b1, blobs_lr_b2);
+ this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+ blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
this->net_->Forward(bottom);
this->net_->Backward();
const vector<shared_ptr<Blob<Dtype> > >& params4 = this->net_->params();