Data Layers Parallel for Multi-GPU

Allow data layers (and also PythonLayer when used as data layer) to be shared among worker solver's training net, and also test net for future-proof if one wants to do Multi-GPU testing. Data layers are locked during forward to ensure sequential forward.
author: Ronghang Hu <huronghang@hotmail.com> 2015-08-11 21:38:06 -0700
committer: Ronghang Hu <huronghang@hotmail.com> 2015-08-12 10:51:45 -0700
commit: 0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc (patch)
tree: 1d6aa3258483de57074730ba7e55e1fb5870e793 /include
parent: 8771d0f4317fc0081d86b7637f5f5ceef5b92dfb (diff)
download: caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.gz
caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.bz2
caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.zip
5 files changed, 44 insertions, 8 deletions
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index 12e6c366..552d8141 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -34,6 +34,8 @@ class BaseDataLayer : public Layer<Dtype> {
   // This method may not be overridden except by the BasePrefetchingDataLayer.
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  // Data layers should be shared by multiple solvers in parallel
+  virtual inline bool ShareInParallel() const { return true; }
   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
   // Data layers have no bottoms, so reshaping is trivial.
@@ -94,7 +96,8 @@ class DataLayer : public BasePrefetchingDataLayer<Dtype> {
   virtual ~DataLayer();
   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-
+  // DataLayer uses DataReader instead for sharing for parallelism
+  virtual inline bool ShareInParallel() const { return false; }
   virtual inline const char* type() const { return "Data"; }
   virtual inline int ExactNumBottomBlobs() const { return 0; }
   virtual inline int MinTopBlobs() const { return 1; }
@@ -118,6 +121,8 @@ class DummyDataLayer : public Layer<Dtype> {
       : Layer<Dtype>(param) {}
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  // Data layers should be shared by multiple solvers in parallel
+  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
@@ -151,6 +156,8 @@ class HDF5DataLayer : public Layer<Dtype> {
   virtual ~HDF5DataLayer();
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  // Data layers should be shared by multiple solvers in parallel
+  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
@@ -192,6 +199,8 @@ class HDF5OutputLayer : public Layer<Dtype> {
   virtual ~HDF5OutputLayer();
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  // Data layers should be shared by multiple solvers in parallel
+  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 0771b6a8..d82197a9 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -1,6 +1,7 @@
 #ifndef CAFFE_LAYER_H_
 #define CAFFE_LAYER_H_
 
+#include <boost/thread.hpp>
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -86,6 +87,14 @@ class Layer {
       const vector<Blob<Dtype>*>& top) {}
 
   /**
+   * @brief Whether a layer should be shared by multiple nets during data
+   *        parallelism. By default, all layers except for data layers should
+   *        not be shared. data layers should be shared to ensure each worker
+   *        solver access data sequentially during data parallelism.
+   */
+  virtual inline bool ShareInParallel() const { return false; }
+
+  /**
    * @brief Adjust the shapes of top blobs and internal buffers to accommodate
    *        the shapes of the bottom blobs.
    *
@@ -396,6 +405,10 @@ class Layer {
     }
   }
 
+ private:
+  // mutex to lock layer to ensure sequential forward
+  boost::mutex forward_mutex_;
+
   DISABLE_COPY_AND_ASSIGN(Layer);
 };  // class Layer
 
@@ -405,6 +418,8 @@ class Layer {
 template <typename Dtype>
 inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
+  // Lock during forward to ensure sequential forward
+  boost::mutex::scoped_lock lock(forward_mutex_);
   Dtype loss = 0;
   Reshape(bottom, top);
   switch (Caffe::mode()) {
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index bf997553..1bf07d28 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -23,8 +23,9 @@ namespace caffe {
 template <typename Dtype>
 class Net {
  public:
-  explicit Net(const NetParameter& param);
-  explicit Net(const string& param_file, Phase phase);
+  explicit Net(const NetParameter& param, const Net* root_net = NULL);
+  explicit Net(const string& param_file, Phase phase,
+      const Net* root_net = NULL);
   virtual ~Net() {}
 
   /// @brief Initialize a network with a NetParameter.
@@ -291,7 +292,8 @@ class Net {
   size_t memory_used_;
   /// Whether to compute and display debug info for the net.
   bool debug_info_;
-
+  /// The root net that actually holds the shared layers in data parallelism
+  const Net* const root_net_;
   DISABLE_COPY_AND_ASSIGN(Net);
 };
 
diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp
index 2957e742..c43c1e8a 100644
--- a/include/caffe/python_layer.hpp
+++ b/include/caffe/python_layer.hpp
@@ -27,6 +27,10 @@ class PythonLayer : public Layer<Dtype> {
     self_.attr("reshape")(bottom, top);
   }
 
+  virtual inline bool ShareInParallel() const {
+    return this->layer_param_.python_param().share_in_parallel();
+  }
+
   virtual inline const char* type() const { return "Python"; }
 
  protected:
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 89a6c76d..f583324a 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -17,8 +17,9 @@ namespace caffe {
 template <typename Dtype>
 class Solver {
  public:
-  explicit Solver(const SolverParameter& param);
-  explicit Solver(const string& param_file);
+  explicit Solver(const SolverParameter& param,
+      const Solver* root_solver = NULL);
+  explicit Solver(const string& param_file, const Solver* root_solver = NULL);
   void Init(const SolverParameter& param);
   void InitTrainNet();
   void InitTestNets();
@@ -79,6 +80,10 @@ class Solver {
   vector<shared_ptr<Net<Dtype> > > test_nets_;
   vector<Callback*> callbacks_;
 
+  // The root solver that holds root nets (actually containing shared layers)
+  // in data parallelism
+  const Solver* const root_solver_;
+
   DISABLE_COPY_AND_ASSIGN(Solver);
 };
 
@@ -89,8 +94,9 @@ class Solver {
 template <typename Dtype>
 class WorkerSolver : public Solver<Dtype> {
  public:
-  explicit WorkerSolver(const SolverParameter& param)
-      : Solver<Dtype>(param) {}
+  explicit WorkerSolver(const SolverParameter& param,
+      const Solver<Dtype>* root_solver = NULL)
+      : Solver<Dtype>(param, root_solver) {}
 
  protected:
   void ApplyUpdate() {}
author	Ronghang Hu <huronghang@hotmail.com>	2015-08-11 21:38:06 -0700
committer	Ronghang Hu <huronghang@hotmail.com>	2015-08-12 10:51:45 -0700
commit	0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc (patch)
tree	1d6aa3258483de57074730ba7e55e1fb5870e793 /include
parent	8771d0f4317fc0081d86b7637f5f5ceef5b92dfb (diff)
download	caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.gz caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.bz2 caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.zip