diff options
author | Ronghang Hu <huronghang@hotmail.com> | 2015-08-11 21:38:06 -0700 |
---|---|---|
committer | Ronghang Hu <huronghang@hotmail.com> | 2015-08-12 10:51:45 -0700 |
commit | 0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc (patch) | |
tree | 1d6aa3258483de57074730ba7e55e1fb5870e793 /include | |
parent | 8771d0f4317fc0081d86b7637f5f5ceef5b92dfb (diff) | |
download | caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.gz caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.bz2 caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.zip |
Data Layers Parallel for Multi-GPU
Allow data layers (and also PythonLayer when used as data layer) to be shared
among worker solver's training net, and also test net for future-proof if one
wants to do Multi-GPU testing. Data layers are locked during forward to ensure
sequential forward.
Diffstat (limited to 'include')
-rw-r--r-- | include/caffe/data_layers.hpp | 11 | ||||
-rw-r--r-- | include/caffe/layer.hpp | 15 | ||||
-rw-r--r-- | include/caffe/net.hpp | 8 | ||||
-rw-r--r-- | include/caffe/python_layer.hpp | 4 | ||||
-rw-r--r-- | include/caffe/solver.hpp | 14 |
5 files changed, 44 insertions, 8 deletions
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 12e6c366..552d8141 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -34,6 +34,8 @@ class BaseDataLayer : public Layer<Dtype> { // This method may not be overridden except by the BasePrefetchingDataLayer. virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} // Data layers have no bottoms, so reshaping is trivial. @@ -94,7 +96,8 @@ class DataLayer : public BasePrefetchingDataLayer<Dtype> { virtual ~DataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); - + // DataLayer uses DataReader instead for sharing for parallelism + virtual inline bool ShareInParallel() const { return false; } virtual inline const char* type() const { return "Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } @@ -118,6 +121,8 @@ class DummyDataLayer : public Layer<Dtype> { : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} @@ -151,6 +156,8 @@ class HDF5DataLayer : public Layer<Dtype> { virtual ~HDF5DataLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} @@ -192,6 +199,8 @@ class HDF5OutputLayer : public Layer<Dtype> { virtual ~HDF5OutputLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); + // Data layers should be shared by multiple solvers in parallel + virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {} diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 0771b6a8..d82197a9 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -1,6 +1,7 @@ #ifndef CAFFE_LAYER_H_ #define CAFFE_LAYER_H_ +#include <boost/thread.hpp> #include <algorithm> #include <string> #include <vector> @@ -86,6 +87,14 @@ class Layer { const vector<Blob<Dtype>*>& top) {} /** + * @brief Whether a layer should be shared by multiple nets during data + * parallelism. By default, all layers except for data layers should + * not be shared. data layers should be shared to ensure each worker + * solver access data sequentially during data parallelism. + */ + virtual inline bool ShareInParallel() const { return false; } + + /** * @brief Adjust the shapes of top blobs and internal buffers to accommodate * the shapes of the bottom blobs. * @@ -396,6 +405,10 @@ class Layer { } } + private: + // mutex to lock layer to ensure sequential forward + boost::mutex forward_mutex_; + DISABLE_COPY_AND_ASSIGN(Layer); }; // class Layer @@ -405,6 +418,8 @@ class Layer { template <typename Dtype> inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { + // Lock during forward to ensure sequential forward + boost::mutex::scoped_lock lock(forward_mutex_); Dtype loss = 0; Reshape(bottom, top); switch (Caffe::mode()) { diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index bf997553..1bf07d28 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -23,8 +23,9 @@ namespace caffe { template <typename Dtype> class Net { public: - explicit Net(const NetParameter& param); - explicit Net(const string& param_file, Phase phase); + explicit Net(const NetParameter& param, const Net* root_net = NULL); + explicit Net(const string& param_file, Phase phase, + const Net* root_net = NULL); virtual ~Net() {} /// @brief Initialize a network with a NetParameter. @@ -291,7 +292,8 @@ class Net { size_t memory_used_; /// Whether to compute and display debug info for the net. bool debug_info_; - + /// The root net that actually holds the shared layers in data parallelism + const Net* const root_net_; DISABLE_COPY_AND_ASSIGN(Net); }; diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp index 2957e742..c43c1e8a 100644 --- a/include/caffe/python_layer.hpp +++ b/include/caffe/python_layer.hpp @@ -27,6 +27,10 @@ class PythonLayer : public Layer<Dtype> { self_.attr("reshape")(bottom, top); } + virtual inline bool ShareInParallel() const { + return this->layer_param_.python_param().share_in_parallel(); + } + virtual inline const char* type() const { return "Python"; } protected: diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 89a6c76d..f583324a 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -17,8 +17,9 @@ namespace caffe { template <typename Dtype> class Solver { public: - explicit Solver(const SolverParameter& param); - explicit Solver(const string& param_file); + explicit Solver(const SolverParameter& param, + const Solver* root_solver = NULL); + explicit Solver(const string& param_file, const Solver* root_solver = NULL); void Init(const SolverParameter& param); void InitTrainNet(); void InitTestNets(); @@ -79,6 +80,10 @@ class Solver { vector<shared_ptr<Net<Dtype> > > test_nets_; vector<Callback*> callbacks_; + // The root solver that holds root nets (actually containing shared layers) + // in data parallelism + const Solver* const root_solver_; + DISABLE_COPY_AND_ASSIGN(Solver); }; @@ -89,8 +94,9 @@ class Solver { template <typename Dtype> class WorkerSolver : public Solver<Dtype> { public: - explicit WorkerSolver(const SolverParameter& param) - : Solver<Dtype>(param) {} + explicit WorkerSolver(const SolverParameter& param, + const Solver<Dtype>* root_solver = NULL) + : Solver<Dtype>(param, root_solver) {} protected: void ApplyUpdate() {} |