summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorRonghang Hu <huronghang@hotmail.com>2015-08-11 21:38:06 -0700
committerRonghang Hu <huronghang@hotmail.com>2015-08-12 10:51:45 -0700
commit0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc (patch)
tree1d6aa3258483de57074730ba7e55e1fb5870e793 /include
parent8771d0f4317fc0081d86b7637f5f5ceef5b92dfb (diff)
downloadcaffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.gz
caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.tar.bz2
caffeonacl-0d34d5ba0fbdc09ac8f372cb581ccaec599f10bc.zip
Data Layers Parallel for Multi-GPU
Allow data layers (and also PythonLayer when used as data layer) to be shared among worker solver's training net, and also test net for future-proof if one wants to do Multi-GPU testing. Data layers are locked during forward to ensure sequential forward.
Diffstat (limited to 'include')
-rw-r--r--include/caffe/data_layers.hpp11
-rw-r--r--include/caffe/layer.hpp15
-rw-r--r--include/caffe/net.hpp8
-rw-r--r--include/caffe/python_layer.hpp4
-rw-r--r--include/caffe/solver.hpp14
5 files changed, 44 insertions, 8 deletions
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index 12e6c366..552d8141 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -34,6 +34,8 @@ class BaseDataLayer : public Layer<Dtype> {
// This method may not be overridden except by the BasePrefetchingDataLayer.
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
+ // Data layers should be shared by multiple solvers in parallel
+ virtual inline bool ShareInParallel() const { return true; }
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
// Data layers have no bottoms, so reshaping is trivial.
@@ -94,7 +96,8 @@ class DataLayer : public BasePrefetchingDataLayer<Dtype> {
virtual ~DataLayer();
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
-
+ // DataLayer uses DataReader instead for sharing for parallelism
+ virtual inline bool ShareInParallel() const { return false; }
virtual inline const char* type() const { return "Data"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int MinTopBlobs() const { return 1; }
@@ -118,6 +121,8 @@ class DummyDataLayer : public Layer<Dtype> {
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
+ // Data layers should be shared by multiple solvers in parallel
+ virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
@@ -151,6 +156,8 @@ class HDF5DataLayer : public Layer<Dtype> {
virtual ~HDF5DataLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
+ // Data layers should be shared by multiple solvers in parallel
+ virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
@@ -192,6 +199,8 @@ class HDF5OutputLayer : public Layer<Dtype> {
virtual ~HDF5OutputLayer();
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
+ // Data layers should be shared by multiple solvers in parallel
+ virtual inline bool ShareInParallel() const { return true; }
// Data layers have no bottoms, so reshaping is trivial.
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 0771b6a8..d82197a9 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -1,6 +1,7 @@
#ifndef CAFFE_LAYER_H_
#define CAFFE_LAYER_H_
+#include <boost/thread.hpp>
#include <algorithm>
#include <string>
#include <vector>
@@ -86,6 +87,14 @@ class Layer {
const vector<Blob<Dtype>*>& top) {}
/**
+ * @brief Whether a layer should be shared by multiple nets during data
+ * parallelism. By default, all layers except for data layers should
+ * not be shared. data layers should be shared to ensure each worker
+ * solver access data sequentially during data parallelism.
+ */
+ virtual inline bool ShareInParallel() const { return false; }
+
+ /**
* @brief Adjust the shapes of top blobs and internal buffers to accommodate
* the shapes of the bottom blobs.
*
@@ -396,6 +405,10 @@ class Layer {
}
}
+ private:
+ // mutex to lock layer to ensure sequential forward
+ boost::mutex forward_mutex_;
+
DISABLE_COPY_AND_ASSIGN(Layer);
}; // class Layer
@@ -405,6 +418,8 @@ class Layer {
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
+ // Lock during forward to ensure sequential forward
+ boost::mutex::scoped_lock lock(forward_mutex_);
Dtype loss = 0;
Reshape(bottom, top);
switch (Caffe::mode()) {
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index bf997553..1bf07d28 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -23,8 +23,9 @@ namespace caffe {
template <typename Dtype>
class Net {
public:
- explicit Net(const NetParameter& param);
- explicit Net(const string& param_file, Phase phase);
+ explicit Net(const NetParameter& param, const Net* root_net = NULL);
+ explicit Net(const string& param_file, Phase phase,
+ const Net* root_net = NULL);
virtual ~Net() {}
/// @brief Initialize a network with a NetParameter.
@@ -291,7 +292,8 @@ class Net {
size_t memory_used_;
/// Whether to compute and display debug info for the net.
bool debug_info_;
-
+ /// The root net that actually holds the shared layers in data parallelism
+ const Net* const root_net_;
DISABLE_COPY_AND_ASSIGN(Net);
};
diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp
index 2957e742..c43c1e8a 100644
--- a/include/caffe/python_layer.hpp
+++ b/include/caffe/python_layer.hpp
@@ -27,6 +27,10 @@ class PythonLayer : public Layer<Dtype> {
self_.attr("reshape")(bottom, top);
}
+ virtual inline bool ShareInParallel() const {
+ return this->layer_param_.python_param().share_in_parallel();
+ }
+
virtual inline const char* type() const { return "Python"; }
protected:
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 89a6c76d..f583324a 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -17,8 +17,9 @@ namespace caffe {
template <typename Dtype>
class Solver {
public:
- explicit Solver(const SolverParameter& param);
- explicit Solver(const string& param_file);
+ explicit Solver(const SolverParameter& param,
+ const Solver* root_solver = NULL);
+ explicit Solver(const string& param_file, const Solver* root_solver = NULL);
void Init(const SolverParameter& param);
void InitTrainNet();
void InitTestNets();
@@ -79,6 +80,10 @@ class Solver {
vector<shared_ptr<Net<Dtype> > > test_nets_;
vector<Callback*> callbacks_;
+ // The root solver that holds root nets (actually containing shared layers)
+ // in data parallelism
+ const Solver* const root_solver_;
+
DISABLE_COPY_AND_ASSIGN(Solver);
};
@@ -89,8 +94,9 @@ class Solver {
template <typename Dtype>
class WorkerSolver : public Solver<Dtype> {
public:
- explicit WorkerSolver(const SolverParameter& param)
- : Solver<Dtype>(param) {}
+ explicit WorkerSolver(const SolverParameter& param,
+ const Solver<Dtype>* root_solver = NULL)
+ : Solver<Dtype>(param, root_solver) {}
protected:
void ApplyUpdate() {}