diff options
author | Jeff Donahue <jeff.donahue@gmail.com> | 2014-07-11 01:48:36 -0700 |
---|---|---|
committer | Jeff Donahue <jeff.donahue@gmail.com> | 2014-08-13 11:57:33 -0700 |
commit | 7a3ed9b8edf43895770b63cb4d9f5cacf0dba047 (patch) | |
tree | 25ae32e517230009f913eff910a25316fbc5fb3e | |
parent | d0cae53dd5291331ca7da0dfef0e7ff54e8e0bac (diff) | |
download | caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.gz caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.bz2 caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.zip |
Add net tests for loss_weight.
Check that the loss and gradients throughout the net are appropriately scaled
for a few loss_weight values, assuming a default weight of 1 in the loss layer
only. Also modify test_gradient_check_util to associate a loss of 2 rather
than 1 with the top blob, so that loss layer tests fail if they don't scale
their diffs.
-rw-r--r-- | include/caffe/test/test_gradient_check_util.hpp | 37 | ||||
-rw-r--r-- | src/caffe/test/test_net.cpp | 166 |
2 files changed, 176 insertions, 27 deletions
diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp index 4cf2cbc9..5a8d382f 100644 --- a/include/caffe/test/test_gradient_check_util.hpp +++ b/include/caffe/test/test_gradient_check_util.hpp @@ -57,8 +57,8 @@ class GradientChecker { const vector<Blob<Dtype>*>& input); protected: - Dtype GetObjAndGradient(vector<Blob<Dtype>*>* top, int top_id = -1, - int top_data_id = -1); + Dtype GetObjAndGradient(const Layer<Dtype>& layer, vector<Blob<Dtype>*>* top, + int top_id = -1, int top_data_id = -1); Dtype stepsize_; Dtype threshold_; unsigned int seed_; @@ -97,10 +97,11 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer, } // Compute the gradient analytically using Backward Caffe::set_random_seed(seed_); - // Get any loss from the layer - Dtype computed_objective = layer->Forward(*bottom, top); + // Ignore the loss from the layer (it's just the weighted sum of the losses + // from the top blobs, whose gradients we may want to test individually). + layer->Forward(*bottom, top); // Get additional loss from the objective - computed_objective += GetObjAndGradient(top, top_id, top_data_id); + GetObjAndGradient(*layer, top, top_id, top_data_id); layer->Backward(*top, propagate_down, bottom); // Store computed gradients for all checked blobs vector<shared_ptr<Blob<Dtype> > > @@ -131,18 +132,22 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer, // i != top_data_id, we know the derivative is 0 by definition, and simply // check that that's true. Dtype estimated_gradient = 0; + Dtype positive_objective = 0; + Dtype negative_objective = 0; if (!element_wise || (feat_id == top_data_id)) { // Do finite differencing. // Compute loss with stepsize_ added to input. current_blob->mutable_cpu_data()[feat_id] += stepsize_; Caffe::set_random_seed(seed_); - Dtype positive_objective = layer->Forward(*bottom, top); - positive_objective += GetObjAndGradient(top, top_id, top_data_id); + layer->Forward(*bottom, top); + positive_objective = + GetObjAndGradient(*layer, top, top_id, top_data_id); // Compute loss with stepsize_ subtracted from input. current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; Caffe::set_random_seed(seed_); - Dtype negative_objective = layer->Forward(*bottom, top); - negative_objective += GetObjAndGradient(top, top_id, top_data_id); + layer->Forward(*bottom, top); + negative_objective = + GetObjAndGradient(*layer, top, top_id, top_data_id); // Recover original input value. current_blob->mutable_cpu_data()[feat_id] += stepsize_; estimated_gradient = (positive_objective - negative_objective) / @@ -160,7 +165,10 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer, std::max(fabs(computed_gradient), fabs(estimated_gradient)), 1.); EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale) << "debug: (top_id, top_data_id, blob_id, feat_id)=" - << top_id << "," << top_data_id << "," << blob_id << "," << feat_id; + << top_id << "," << top_data_id << "," << blob_id << "," << feat_id + << "; feat = " << feature + << "; objective+ = " << positive_objective + << "; objective- = " << negative_objective; } // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "computed gradient: " << computed_gradient @@ -212,8 +220,8 @@ void GradientChecker<Dtype>::CheckGradientNet( } template <typename Dtype> -Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top, - int top_id, int top_data_id) { +Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer, + vector<Blob<Dtype>*>* top, int top_id, int top_data_id) { Dtype loss = 0; if (top_id < 0) { // the loss will be half of the sum of squares of all outputs @@ -236,8 +244,9 @@ Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top, Dtype* top_blob_diff = top_blob->mutable_cpu_diff(); caffe_set(top_blob->count(), Dtype(0), top_blob_diff); } - loss = (*top)[top_id]->cpu_data()[top_data_id]; - (*top)[top_id]->mutable_cpu_diff()[top_data_id] = 1.; + const Dtype loss_weight = 2; + loss = (*top)[top_id]->cpu_data()[top_data_id] * loss_weight; + (*top)[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight; } return loss; } diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 18bc9ad7..acd3bcdd 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -141,7 +141,11 @@ class NetTest : public MultiDeviceTest<TypeParam> { InitNetFromProtoString(proto); } - virtual void InitTrickyNet() { + virtual void InitTrickyNet(Dtype* loss_weight = NULL) { + ostringstream loss_weight_stream; + if (loss_weight) { + loss_weight_stream << " top_loss_weight: " << *loss_weight << " "; + } const string& proto = "name: 'TrickyTestNetwork' " "layers: { " @@ -208,19 +212,27 @@ class NetTest : public MultiDeviceTest<TypeParam> { "} " "layers: { " " name: 'loss' " - " type: SOFTMAX_LOSS " + " type: SOFTMAX_LOSS " + + loss_weight_stream.str() + " bottom: 'transformed_data' " " bottom: 'transformed_label' " "} "; InitNetFromProtoString(proto); } - virtual void InitUnsharedWeightsNet(const bool bias_term = false, + virtual void InitUnsharedWeightsNet(Dtype* loss_weight, + const bool force_backward = false, const bool bias_term = false, const Dtype blobs_lr_w1 = 1, const Dtype blobs_lr_b1 = 2, const Dtype blobs_lr_w2 = 1, const Dtype blobs_lr_b2 = 2) { ostringstream proto; + if (loss_weight) { + loss_weight_stream << " top_loss_weight: " << *loss_weight << " "; + } + proto << "name: 'UnsharedWeightsNetwork' "; + if (force_backward) { + proto << "force_backward: true "; + } proto << - "name: 'UnsharedWeightsNetwork' " "layers: { " " name: 'data' " " type: DUMMY_DATA " @@ -286,7 +298,11 @@ class NetTest : public MultiDeviceTest<TypeParam> { "} " "layers: { " " name: 'loss' " - " type: EUCLIDEAN_LOSS " + " type: EUCLIDEAN_LOSS "; + if (loss_weight) { + proto << " top_loss_weight: " << *loss_weight << " "; + } + proto << " bottom: 'innerproduct1' " " bottom: 'innerproduct2' " "} "; @@ -575,6 +591,128 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) { EXPECT_EQ(true, bottom_need_backward[3][1]); } +TYPED_TEST(NetTest, TestLossWeightCPU) { + Caffe::set_mode(Caffe::CPU); + // First, compute the loss and gradients with no top_loss_weight specified. + // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default + // to 1. + vector<Blob<TypeParam>*> bottom; + Caffe::set_random_seed(this->seed_); + const bool kForceBackward = true; + this->InitUnsharedWeightsNet(NULL, kForceBackward); + const TypeParam loss = this->net_->ForwardBackward(bottom); + const bool kCopyDiff = true; + const bool kReshape = true; + const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs(); + vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size()); + for (int i = 0; i < net_blobs.size(); ++i) { + blob_grads[i].reset(new Blob<TypeParam>()); + blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape); + } + const vector<shared_ptr<Blob<TypeParam> > >& net_params = + this->net_->params(); + vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size()); + for (int i = 0; i < net_params.size(); ++i) { + param_grads[i].reset(new Blob<TypeParam>()); + param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape); + } + // Check that the loss is non-trivial, otherwise the test doesn't prove much. + const TypeParam kMinLossAbsValue = 1e-2; + ASSERT_GE(fabs(loss), kMinLossAbsValue); + const TypeParam kErrorMargin = 1e-5; + const int kNumLossWeights = 6; + TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7}; + for (int i = 0; i < kNumLossWeights; ++i) { + Caffe::set_random_seed(this->seed_); + this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward); + const TypeParam weighted_loss = this->net_->ForwardBackward(bottom); + const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]); + EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin) + << "loss weight = " << kLossWeights[i]; + const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs = + this->net_->blobs(); + ASSERT_EQ(blob_grads.size(), weighted_blobs.size()); + for (int j = 0; j < blob_grads.size(); ++j) { + ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count()); + for (int k = 0; k < blob_grads[j]->count(); ++k) { + EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i], + weighted_blobs[j]->cpu_diff()[k], error_margin); + } + } + const vector<shared_ptr<Blob<TypeParam> > >& weighted_params = + this->net_->params(); + ASSERT_EQ(param_grads.size(), weighted_params.size()); + for (int j = 0; j < param_grads.size(); ++j) { + ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count()); + for (int k = 0; k < param_grads[j]->count(); ++k) { + EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i], + weighted_params[j]->cpu_diff()[k], error_margin); + } + } + } +} + +TYPED_TEST(NetTest, TestLossWeightGPU) { + Caffe::set_mode(Caffe::GPU); + // First, compute the loss and gradients with no top_loss_weight specified. + // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default + // to 1. + vector<Blob<TypeParam>*> bottom; + Caffe::set_random_seed(this->seed_); + const bool kForceBackward = true; + this->InitUnsharedWeightsNet(NULL, kForceBackward); + const TypeParam loss = this->net_->ForwardBackward(bottom); + const bool kCopyDiff = true; + const bool kReshape = true; + const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs(); + vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size()); + for (int i = 0; i < net_blobs.size(); ++i) { + blob_grads[i].reset(new Blob<TypeParam>()); + blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape); + } + const vector<shared_ptr<Blob<TypeParam> > >& net_params = + this->net_->params(); + vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size()); + for (int i = 0; i < net_params.size(); ++i) { + param_grads[i].reset(new Blob<TypeParam>()); + param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape); + } + // Check that the loss is non-trivial, otherwise the test doesn't prove much. + const TypeParam kMinLossAbsValue = 1e-2; + ASSERT_GE(fabs(loss), kMinLossAbsValue); + const Dtype kErrorMargin = 1e-4; + const int kNumLossWeights = 6; + TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7}; + for (int i = 0; i < kNumLossWeights; ++i) { + Caffe::set_random_seed(this->seed_); + this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward); + const TypeParam weighted_loss = this->net_->ForwardBackward(bottom); + const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]); + EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin) + << "loss weight = " << kLossWeights[i]; + const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs = + this->net_->blobs(); + ASSERT_EQ(blob_grads.size(), weighted_blobs.size()); + for (int j = 0; j < blob_grads.size(); ++j) { + ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count()); + for (int k = 0; k < blob_grads[j]->count(); ++k) { + EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i], + weighted_blobs[j]->cpu_diff()[k], error_margin); + } + } + const vector<shared_ptr<Blob<TypeParam> > >& weighted_params = + this->net_->params(); + ASSERT_EQ(param_grads.size(), weighted_params.size()); + for (int j = 0; j < param_grads.size(); ++j) { + ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count()); + for (int k = 0; k < param_grads[j]->count(); ++k) { + EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i], + weighted_params[j]->cpu_diff()[k], error_margin); + } + } + } +} + TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) { typedef typename TypeParam::Dtype Dtype; this->InitUnsharedWeightsNet(); @@ -722,12 +860,14 @@ TYPED_TEST(NetTest, TestParamPropagateDown) { typedef typename TypeParam::Dtype Dtype; vector<Blob<Dtype>*> bottom; const bool kBiasTerm = true; + const bool kForceBackward = false; + const Dtype* kLossWeight = NULL; // Run the net with all params learned; check that gradients are non-zero. Caffe::set_random_seed(this->seed_); Dtype blobs_lr_w1 = 1, blobs_lr_w2 = 1, blobs_lr_b1 = 2, blobs_lr_b2 = 2; - this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2, - blobs_lr_b1, blobs_lr_b2); + this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm + blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2); this->net_->Forward(bottom); this->net_->Backward(); const vector<shared_ptr<Blob<Dtype> > >& params = this->net_->params(); @@ -746,8 +886,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) { // gradients. Caffe::set_random_seed(this->seed_); blobs_lr_w1 *= 2, blobs_lr_w2 *= 2, blobs_lr_b1 *= 2, blobs_lr_b2 *= 2; - this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2, - blobs_lr_b1, blobs_lr_b2); + this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm + blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2); this->net_->Forward(bottom); this->net_->Backward(); const vector<shared_ptr<Blob<Dtype> > >& params2 = this->net_->params(); @@ -762,8 +902,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) { // gradients for those. Caffe::set_random_seed(this->seed_); blobs_lr_w1 = 1, blobs_lr_w2 = 0, blobs_lr_b1 = 0, blobs_lr_b2 = 1; - this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2, - blobs_lr_b1, blobs_lr_b2); + this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm + blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2); this->net_->Forward(bottom); this->net_->Backward(); const vector<shared_ptr<Blob<Dtype> > >& params3 = this->net_->params(); @@ -781,8 +921,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) { // Change the opposite subset of the learning rates to zero. Caffe::set_random_seed(this->seed_); blobs_lr_w1 = 0, blobs_lr_w2 = 1, blobs_lr_b1 = 1, blobs_lr_b2 = 0; - this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2, - blobs_lr_b1, blobs_lr_b2); + this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm + blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2); this->net_->Forward(bottom); this->net_->Backward(); const vector<shared_ptr<Blob<Dtype> > >& params4 = this->net_->params(); |