Add net tests for loss_weight.

Check that the loss and gradients throughout the net are appropriately scaled for a few loss_weight values, assuming a default weight of 1 in the loss layer only. Also modify test_gradient_check_util to associate a loss of 2 rather than 1 with the top blob, so that loss layer tests fail if they don't scale their diffs.
author: Jeff Donahue <jeff.donahue@gmail.com> 2014-07-11 01:48:36 -0700
committer: Jeff Donahue <jeff.donahue@gmail.com> 2014-08-13 11:57:33 -0700
commit: 7a3ed9b8edf43895770b63cb4d9f5cacf0dba047 (patch)
tree: 25ae32e517230009f913eff910a25316fbc5fb3e
parent: d0cae53dd5291331ca7da0dfef0e7ff54e8e0bac (diff)
download: caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.gz
caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.bz2
caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.zip
2 files changed, 176 insertions, 27 deletions
diff --git a/include/caffe/test/test_gradient_check_util.hpp b/include/caffe/test/test_gradient_check_util.hpp
index 4cf2cbc9..5a8d382f 100644
--- a/include/caffe/test/test_gradient_check_util.hpp
+++ b/include/caffe/test/test_gradient_check_util.hpp
@@ -57,8 +57,8 @@ class GradientChecker {
       const vector<Blob<Dtype>*>& input);
 
  protected:
-  Dtype GetObjAndGradient(vector<Blob<Dtype>*>* top, int top_id = -1,
-      int top_data_id = -1);
+  Dtype GetObjAndGradient(const Layer<Dtype>& layer, vector<Blob<Dtype>*>* top,
+      int top_id = -1, int top_data_id = -1);
   Dtype stepsize_;
   Dtype threshold_;
   unsigned int seed_;
@@ -97,10 +97,11 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
   }
   // Compute the gradient analytically using Backward
   Caffe::set_random_seed(seed_);
-  // Get any loss from the layer
-  Dtype computed_objective = layer->Forward(*bottom, top);
+  // Ignore the loss from the layer (it's just the weighted sum of the losses
+  // from the top blobs, whose gradients we may want to test individually).
+  layer->Forward(*bottom, top);
   // Get additional loss from the objective
-  computed_objective += GetObjAndGradient(top, top_id, top_data_id);
+  GetObjAndGradient(*layer, top, top_id, top_data_id);
   layer->Backward(*top, propagate_down, bottom);
   // Store computed gradients for all checked blobs
   vector<shared_ptr<Blob<Dtype> > >
@@ -131,18 +132,22 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
       // i != top_data_id, we know the derivative is 0 by definition, and simply
       // check that that's true.
       Dtype estimated_gradient = 0;
+      Dtype positive_objective = 0;
+      Dtype negative_objective = 0;
       if (!element_wise || (feat_id == top_data_id)) {
         // Do finite differencing.
         // Compute loss with stepsize_ added to input.
         current_blob->mutable_cpu_data()[feat_id] += stepsize_;
         Caffe::set_random_seed(seed_);
-        Dtype positive_objective = layer->Forward(*bottom, top);
-        positive_objective += GetObjAndGradient(top, top_id, top_data_id);
+        layer->Forward(*bottom, top);
+        positive_objective =
+            GetObjAndGradient(*layer, top, top_id, top_data_id);
         // Compute loss with stepsize_ subtracted from input.
         current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
         Caffe::set_random_seed(seed_);
-        Dtype negative_objective = layer->Forward(*bottom, top);
-        negative_objective += GetObjAndGradient(top, top_id, top_data_id);
+        layer->Forward(*bottom, top);
+        negative_objective =
+            GetObjAndGradient(*layer, top, top_id, top_data_id);
         // Recover original input value.
         current_blob->mutable_cpu_data()[feat_id] += stepsize_;
         estimated_gradient = (positive_objective - negative_objective) /
@@ -160,7 +165,10 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>* layer,
             std::max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
         EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale)
           << "debug: (top_id, top_data_id, blob_id, feat_id)="
-          << top_id << "," << top_data_id << "," << blob_id << "," << feat_id;
+          << top_id << "," << top_data_id << "," << blob_id << "," << feat_id
+          << "; feat = " << feature
+          << "; objective+ = " << positive_objective
+          << "; objective- = " << negative_objective;
       }
       // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
       // LOG(ERROR) << "computed gradient: " << computed_gradient
@@ -212,8 +220,8 @@ void GradientChecker<Dtype>::CheckGradientNet(
 }
 
 template <typename Dtype>
-Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top,
-    int top_id, int top_data_id) {
+Dtype GradientChecker<Dtype>::GetObjAndGradient(const Layer<Dtype>& layer,
+    vector<Blob<Dtype>*>* top, int top_id, int top_data_id) {
   Dtype loss = 0;
   if (top_id < 0) {
     // the loss will be half of the sum of squares of all outputs
@@ -236,8 +244,9 @@ Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>* top,
       Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
       caffe_set(top_blob->count(), Dtype(0), top_blob_diff);
     }
-    loss = (*top)[top_id]->cpu_data()[top_data_id];
-    (*top)[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
+    const Dtype loss_weight = 2;
+    loss = (*top)[top_id]->cpu_data()[top_data_id] * loss_weight;
+    (*top)[top_id]->mutable_cpu_diff()[top_data_id] = loss_weight;
   }
   return loss;
 }
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 18bc9ad7..acd3bcdd 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -141,7 +141,11 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     InitNetFromProtoString(proto);
   }
 
-  virtual void InitTrickyNet() {
+  virtual void InitTrickyNet(Dtype* loss_weight = NULL) {
+    ostringstream loss_weight_stream;
+    if (loss_weight) {
+      loss_weight_stream << "  top_loss_weight: " << *loss_weight << " ";
+    }
     const string& proto =
         "name: 'TrickyTestNetwork' "
         "layers: { "
@@ -208,19 +212,27 @@ class NetTest : public MultiDeviceTest<TypeParam> {
         "} "
         "layers: { "
         "  name: 'loss' "
-        "  type: SOFTMAX_LOSS "
+        "  type: SOFTMAX_LOSS " +
+        loss_weight_stream.str() +
         "  bottom: 'transformed_data' "
         "  bottom: 'transformed_label' "
         "} ";
     InitNetFromProtoString(proto);
   }
 
-  virtual void InitUnsharedWeightsNet(const bool bias_term = false,
+  virtual void InitUnsharedWeightsNet(Dtype* loss_weight,
+      const bool force_backward = false, const bool bias_term = false,
       const Dtype blobs_lr_w1 = 1, const Dtype blobs_lr_b1 = 2,
       const Dtype blobs_lr_w2 = 1, const Dtype blobs_lr_b2 = 2) {
     ostringstream proto;
+    if (loss_weight) {
+      loss_weight_stream << "  top_loss_weight: " << *loss_weight << " ";
+    }
+    proto << "name: 'UnsharedWeightsNetwork' ";
+    if (force_backward) {
+      proto << "force_backward: true ";
+    }
     proto <<
-        "name: 'UnsharedWeightsNetwork' "
         "layers: { "
         "  name: 'data' "
         "  type: DUMMY_DATA "
@@ -286,7 +298,11 @@ class NetTest : public MultiDeviceTest<TypeParam> {
         "} "
         "layers: { "
         "  name: 'loss' "
-        "  type: EUCLIDEAN_LOSS "
+        "  type: EUCLIDEAN_LOSS ";
+    if (loss_weight) {
+      proto << "  top_loss_weight: " << *loss_weight << " ";
+    }
+    proto <<
         "  bottom: 'innerproduct1' "
         "  bottom: 'innerproduct2' "
         "} ";
@@ -575,6 +591,128 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) {
   EXPECT_EQ(true, bottom_need_backward[3][1]);
 }
 
+TYPED_TEST(NetTest, TestLossWeightCPU) {
+  Caffe::set_mode(Caffe::CPU);
+  // First, compute the loss and gradients with no top_loss_weight specified.
+  // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default
+  // to 1.
+  vector<Blob<TypeParam>*> bottom;
+  Caffe::set_random_seed(this->seed_);
+  const bool kForceBackward = true;
+  this->InitUnsharedWeightsNet(NULL, kForceBackward);
+  const TypeParam loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  const bool kReshape = true;
+  const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs();
+  vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size());
+  for (int i = 0; i < net_blobs.size(); ++i) {
+    blob_grads[i].reset(new Blob<TypeParam>());
+    blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
+  }
+  const vector<shared_ptr<Blob<TypeParam> > >& net_params =
+      this->net_->params();
+  vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size());
+  for (int i = 0; i < net_params.size(); ++i) {
+    param_grads[i].reset(new Blob<TypeParam>());
+    param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
+  }
+  // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+  const TypeParam kMinLossAbsValue = 1e-2;
+  ASSERT_GE(fabs(loss), kMinLossAbsValue);
+  const TypeParam kErrorMargin = 1e-5;
+  const int kNumLossWeights = 6;
+  TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+  for (int i = 0; i < kNumLossWeights; ++i) {
+    Caffe::set_random_seed(this->seed_);
+    this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward);
+    const TypeParam weighted_loss = this->net_->ForwardBackward(bottom);
+    const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]);
+    EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+        << "loss weight = " << kLossWeights[i];
+    const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs =
+        this->net_->blobs();
+    ASSERT_EQ(blob_grads.size(), weighted_blobs.size());
+    for (int j = 0; j < blob_grads.size(); ++j) {
+      ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count());
+      for (int k = 0; k < blob_grads[j]->count(); ++k) {
+        EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i],
+                    weighted_blobs[j]->cpu_diff()[k], error_margin);
+      }
+    }
+    const vector<shared_ptr<Blob<TypeParam> > >& weighted_params =
+        this->net_->params();
+    ASSERT_EQ(param_grads.size(), weighted_params.size());
+    for (int j = 0; j < param_grads.size(); ++j) {
+      ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count());
+      for (int k = 0; k < param_grads[j]->count(); ++k) {
+        EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i],
+                    weighted_params[j]->cpu_diff()[k], error_margin);
+      }
+    }
+  }
+}
+
+TYPED_TEST(NetTest, TestLossWeightGPU) {
+  Caffe::set_mode(Caffe::GPU);
+  // First, compute the loss and gradients with no top_loss_weight specified.
+  // In this case, the loss weight for the EUCLIDEAN_LOSS layer should default
+  // to 1.
+  vector<Blob<TypeParam>*> bottom;
+  Caffe::set_random_seed(this->seed_);
+  const bool kForceBackward = true;
+  this->InitUnsharedWeightsNet(NULL, kForceBackward);
+  const TypeParam loss = this->net_->ForwardBackward(bottom);
+  const bool kCopyDiff = true;
+  const bool kReshape = true;
+  const vector<shared_ptr<Blob<TypeParam> > >& net_blobs = this->net_->blobs();
+  vector<shared_ptr<Blob<TypeParam> > > blob_grads(net_blobs.size());
+  for (int i = 0; i < net_blobs.size(); ++i) {
+    blob_grads[i].reset(new Blob<TypeParam>());
+    blob_grads[i]->CopyFrom(*net_blobs[i], kCopyDiff, kReshape);
+  }
+  const vector<shared_ptr<Blob<TypeParam> > >& net_params =
+      this->net_->params();
+  vector<shared_ptr<Blob<TypeParam> > > param_grads(net_params.size());
+  for (int i = 0; i < net_params.size(); ++i) {
+    param_grads[i].reset(new Blob<TypeParam>());
+    param_grads[i]->CopyFrom(*net_params[i], kCopyDiff, kReshape);
+  }
+  // Check that the loss is non-trivial, otherwise the test doesn't prove much.
+  const TypeParam kMinLossAbsValue = 1e-2;
+  ASSERT_GE(fabs(loss), kMinLossAbsValue);
+  const Dtype kErrorMargin = 1e-4;
+  const int kNumLossWeights = 6;
+  TypeParam kLossWeights[kNumLossWeights] = {2, 0, 1, -1, -2.5, 3.7};
+  for (int i = 0; i < kNumLossWeights; ++i) {
+    Caffe::set_random_seed(this->seed_);
+    this->InitUnsharedWeightsNet(&kLossWeights[i], kForceBackward);
+    const TypeParam weighted_loss = this->net_->ForwardBackward(bottom);
+    const TypeParam error_margin = kErrorMargin * fabs(kLossWeights[i]);
+    EXPECT_NEAR(loss * kLossWeights[i], weighted_loss, error_margin)
+        << "loss weight = " << kLossWeights[i];
+    const vector<shared_ptr<Blob<TypeParam> > >& weighted_blobs =
+        this->net_->blobs();
+    ASSERT_EQ(blob_grads.size(), weighted_blobs.size());
+    for (int j = 0; j < blob_grads.size(); ++j) {
+      ASSERT_EQ(blob_grads[j]->count(), weighted_blobs[j]->count());
+      for (int k = 0; k < blob_grads[j]->count(); ++k) {
+        EXPECT_NEAR(blob_grads[j]->cpu_diff()[k] * kLossWeights[i],
+                    weighted_blobs[j]->cpu_diff()[k], error_margin);
+      }
+    }
+    const vector<shared_ptr<Blob<TypeParam> > >& weighted_params =
+        this->net_->params();
+    ASSERT_EQ(param_grads.size(), weighted_params.size());
+    for (int j = 0; j < param_grads.size(); ++j) {
+      ASSERT_EQ(param_grads[j]->count(), weighted_params[j]->count());
+      for (int k = 0; k < param_grads[j]->count(); ++k) {
+        EXPECT_NEAR(param_grads[j]->cpu_diff()[k] * kLossWeights[i],
+                    weighted_params[j]->cpu_diff()[k], error_margin);
+      }
+    }
+  }
+}
+
 TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) {
   typedef typename TypeParam::Dtype Dtype;
   this->InitUnsharedWeightsNet();
@@ -722,12 +860,14 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
   typedef typename TypeParam::Dtype Dtype;
   vector<Blob<Dtype>*> bottom;
   const bool kBiasTerm = true;
+  const bool kForceBackward = false;
+  const Dtype* kLossWeight = NULL;
 
   // Run the net with all params learned; check that gradients are non-zero.
   Caffe::set_random_seed(this->seed_);
   Dtype blobs_lr_w1 = 1, blobs_lr_w2 = 1, blobs_lr_b1 = 2, blobs_lr_b2 = 2;
-  this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
-                               blobs_lr_b1, blobs_lr_b2);
+  this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+      blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
   this->net_->Forward(bottom);
   this->net_->Backward();
   const vector<shared_ptr<Blob<Dtype> > >& params = this->net_->params();
@@ -746,8 +886,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
   // gradients.
   Caffe::set_random_seed(this->seed_);
   blobs_lr_w1 *= 2, blobs_lr_w2 *= 2, blobs_lr_b1 *= 2, blobs_lr_b2 *= 2;
-  this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
-                               blobs_lr_b1, blobs_lr_b2);
+  this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+      blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
   this->net_->Forward(bottom);
   this->net_->Backward();
   const vector<shared_ptr<Blob<Dtype> > >& params2 = this->net_->params();
@@ -762,8 +902,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
   // gradients for those.
   Caffe::set_random_seed(this->seed_);
   blobs_lr_w1 = 1, blobs_lr_w2 = 0, blobs_lr_b1 = 0, blobs_lr_b2 = 1;
-  this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
-                               blobs_lr_b1, blobs_lr_b2);
+  this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+      blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
   this->net_->Forward(bottom);
   this->net_->Backward();
   const vector<shared_ptr<Blob<Dtype> > >& params3 = this->net_->params();
@@ -781,8 +921,8 @@ TYPED_TEST(NetTest, TestParamPropagateDown) {
   // Change the opposite subset of the learning rates to zero.
   Caffe::set_random_seed(this->seed_);
   blobs_lr_w1 = 0, blobs_lr_w2 = 1, blobs_lr_b1 = 1, blobs_lr_b2 = 0;
-  this->InitUnsharedWeightsNet(kBiasTerm, blobs_lr_w1, blobs_lr_w2,
-                               blobs_lr_b1, blobs_lr_b2);
+  this->InitUnsharedWeightsNet(kLossWeight, kForceBackward, kBiasTerm
+      blobs_lr_w1, blobs_lr_w2, blobs_lr_b1, blobs_lr_b2);
   this->net_->Forward(bottom);
   this->net_->Backward();
   const vector<shared_ptr<Blob<Dtype> > >& params4 = this->net_->params();
author	Jeff Donahue <jeff.donahue@gmail.com>	2014-07-11 01:48:36 -0700
committer	Jeff Donahue <jeff.donahue@gmail.com>	2014-08-13 11:57:33 -0700
commit	7a3ed9b8edf43895770b63cb4d9f5cacf0dba047 (patch)
tree	25ae32e517230009f913eff910a25316fbc5fb3e
parent	d0cae53dd5291331ca7da0dfef0e7ff54e8e0bac (diff)
download	caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.gz caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.tar.bz2 caffeonacl-7a3ed9b8edf43895770b63cb4d9f5cacf0dba047.zip