diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/caffe/common.hpp | 2 | ||||
-rw-r--r-- | include/caffe/common_layers.hpp | 39 | ||||
-rw-r--r-- | include/caffe/data_layers.hpp | 3 | ||||
-rw-r--r-- | include/caffe/filler.hpp | 71 | ||||
-rw-r--r-- | include/caffe/layer.hpp | 1 | ||||
-rw-r--r-- | include/caffe/loss_layers.hpp | 2 | ||||
-rw-r--r-- | include/caffe/net.hpp | 3 | ||||
-rw-r--r-- | include/caffe/neuron_layers.hpp | 4 | ||||
-rw-r--r-- | include/caffe/python_layer.hpp | 13 | ||||
-rw-r--r-- | include/caffe/solver.hpp | 14 | ||||
-rw-r--r-- | include/caffe/vision_layers.hpp | 66 |
11 files changed, 188 insertions, 30 deletions
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 6cf80a37..5f86bc26 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -19,7 +19,7 @@ #include "caffe/util/device_alternate.hpp" // gflags 2.1 issue: namespace google was changed to gflags without warning. -// Luckily we will be able to use GFLAGS_GFAGS_H_ to detect if it is version +// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version // 2.1. If yes, we will add a temporary solution to redirect the namespace. // TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's // remove the following hack. diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index cae1c3e4..e6b42c14 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -295,6 +295,45 @@ class MVNLayer : public Layer<Dtype> { /// sum_multiplier is used to carry out sum using BLAS Blob<Dtype> sum_multiplier_; + Dtype eps_; +}; + +/* + * @brief Reshapes the input Blob into an arbitrary-sized output Blob. + * + * Note: similarly to FlattenLayer, this layer does not change the input values + * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). + */ +template <typename Dtype> +class ReshapeLayer : public Layer<Dtype> { + public: + explicit ReshapeLayer(const LayerParameter& param) + : Layer<Dtype>(param) {} + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top); + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top); + + virtual inline const char* type() const { return "Reshape"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top) {} + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} + virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top) {} + virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} + + /// @brief vector of axes indices whose dimensions we'll copy from the bottom + vector<int> copy_axes_; + /// @brief the index of the axis whose dimension we infer, or -1 if none + int inferred_axis_; + /// @brief the product of the "constant" output dimensions + int constant_count_; }; /** diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp index 2bb9d948..3958cb7e 100644 --- a/include/caffe/data_layers.hpp +++ b/include/caffe/data_layers.hpp @@ -14,7 +14,6 @@ #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" -#include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" @@ -29,7 +28,6 @@ template <typename Dtype> class BaseDataLayer : public Layer<Dtype> { public: explicit BaseDataLayer(const LayerParameter& param); - virtual ~BaseDataLayer() {} // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden except by the BasePrefetchingDataLayer. @@ -58,7 +56,6 @@ class BasePrefetchingDataLayer : public: explicit BasePrefetchingDataLayer(const LayerParameter& param) : BaseDataLayer<Dtype>(param) {} - virtual ~BasePrefetchingDataLayer() {} // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden. diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index bb18e8e1..ff3542e1 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -126,17 +126,18 @@ class PositiveUnitballFiller : public Filler<Dtype> { }; /** - * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ - * is set inversely proportional to the number of incoming nodes. + * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is + * set inversely proportional to number of incoming nodes, outgoing + * nodes, or their average. * * A Filler based on the paper [Bengio and Glorot 2010]: Understanding - * the difficulty of training deep feedforward neuralnetworks, but does not - * use the fan_out value. + * the difficulty of training deep feedforward neuralnetworks. * - * It fills the incoming matrix by randomly sampling uniform data from - * [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number - * of input nodes. You should make sure the input blob has shape (num, a, b, c) - * where a * b * c = fan_in. + * It fills the incoming matrix by randomly sampling uniform data from [-scale, + * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their + * average, depending on the variance_norm option. You should make sure the + * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c + * = fan_out. Note that this is currently not the case for inner product layers. * * TODO(dox): make notation in above comment consistent with rest & use LaTeX. */ @@ -148,7 +149,16 @@ class XavierFiller : public Filler<Dtype> { virtual void Fill(Blob<Dtype>* blob) { CHECK(blob->count()); int fan_in = blob->count() / blob->num(); - Dtype scale = sqrt(Dtype(3) / fan_in); + int fan_out = blob->count() / blob->channels(); + Dtype n = fan_in; // default to fan_in + if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_AVERAGE) { + n = (fan_in + fan_out) / Dtype(2); + } else if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_FAN_OUT) { + n = fan_out; + } + Dtype scale = sqrt(Dtype(3) / n); caffe_rng_uniform<Dtype>(blob->count(), -scale, scale, blob->mutable_cpu_data()); CHECK_EQ(this->filler_param_.sparse(), -1) @@ -156,6 +166,47 @@ class XavierFiller : public Filler<Dtype> { } }; +/** + * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where + * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming + * nodes, outgoing nodes, or their average. + * + * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically + * accounts for ReLU nonlinearities. + * + * Aside: for another perspective on the scaling factor, see the derivation of + * [Saxe, McClelland, and Ganguli 2013 (v3)]. + * + * It fills the incoming matrix by randomly sampling Gaussian data with std = + * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on + * the variance_norm option. You should make sure the input blob has shape (num, + * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this + * is currently not the case for inner product layers. + */ +template <typename Dtype> +class MSRAFiller : public Filler<Dtype> { + public: + explicit MSRAFiller(const FillerParameter& param) + : Filler<Dtype>(param) {} + virtual void Fill(Blob<Dtype>* blob) { + CHECK(blob->count()); + int fan_in = blob->count() / blob->num(); + int fan_out = blob->count() / blob->channels(); + Dtype n = fan_in; // default to fan_in + if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_AVERAGE) { + n = (fan_in + fan_out) / Dtype(2); + } else if (this->filler_param_.variance_norm() == + FillerParameter_VarianceNorm_FAN_OUT) { + n = fan_out; + } + Dtype std = sqrt(Dtype(2) / n); + caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std, + blob->mutable_cpu_data()); + CHECK_EQ(this->filler_param_.sparse(), -1) + << "Sparsity not supported by this Filler."; + } +}; /** * @brief Get a specific filler from the specification given in FillerParameter. @@ -176,6 +227,8 @@ Filler<Dtype>* GetFiller(const FillerParameter& param) { return new UniformFiller<Dtype>(param); } else if (type == "xavier") { return new XavierFiller<Dtype>(param); + } else if (type == "msra") { + return new MSRAFiller<Dtype>(param); } else { CHECK(false) << "Unknown filler name: " << param.type(); } diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index 2d13ef97..8f924a75 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -406,6 +406,7 @@ template <typename Dtype> inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { Dtype loss = 0; + Reshape(bottom, top); switch (Caffe::mode()) { case Caffe::CPU: Forward_cpu(bottom, top); diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp index d3eecd2e..86c34241 100644 --- a/include/caffe/loss_layers.hpp +++ b/include/caffe/loss_layers.hpp @@ -605,8 +605,6 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> { /// @copydoc SigmoidCrossEntropyLossLayer virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); - virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, - const vector<Blob<Dtype>*>& top); /** * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 075afebc..5665df1e 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -137,6 +137,9 @@ class Net { inline const vector<Dtype>& blob_loss_weights() const { return blob_loss_weights_; } + inline const vector<bool>& layer_need_backward() const { + return layer_need_backward_; + } /// @brief returns the parameters inline const vector<shared_ptr<Blob<Dtype> > >& params() const { return params_; diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp index 32321513..9cf233f0 100644 --- a/include/caffe/neuron_layers.hpp +++ b/include/caffe/neuron_layers.hpp @@ -8,7 +8,6 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer.hpp" -#include "caffe/net.hpp" #include "caffe/proto/caffe.pb.h" #define HDF5_DATA_DATASET_NAME "data" @@ -734,7 +733,8 @@ class PReLULayer : public NeuronLayer<Dtype> { const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); bool channel_shared_; - Blob<Dtype> multiplier_; // dot multipler for backward computation of params + Blob<Dtype> multiplier_; // dot multiplier for backward computation of params + Blob<Dtype> backward_buff_; // temporary buffer for backward computation Blob<Dtype> bottom_memory_; // memory for in-place computation }; diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp index 816ef453..19cf18c9 100644 --- a/include/caffe/python_layer.hpp +++ b/include/caffe/python_layer.hpp @@ -14,12 +14,12 @@ template <typename Dtype> class PythonLayer : public Layer<Dtype> { public: PythonLayer(PyObject* self, const LayerParameter& param) - : Layer<Dtype>(param), self_(self) { } + : Layer<Dtype>(param), self_(bp::handle<>(bp::borrowed(self))) { } virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { try { - bp::call_method<bp::object>(self_, "setup", bottom, top); + self_.attr("setup")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -29,7 +29,7 @@ class PythonLayer : public Layer<Dtype> { virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { try { - bp::call_method<bp::object>(self_, "reshape", bottom, top); + self_.attr("reshape")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -42,7 +42,7 @@ class PythonLayer : public Layer<Dtype> { virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { try { - bp::call_method<bp::object>(self_, "forward", bottom, top); + self_.attr("forward")(bottom, top); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -51,8 +51,7 @@ class PythonLayer : public Layer<Dtype> { virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { try { - bp::call_method<bp::object>(self_, "backward", top, propagate_down, - bottom); + self_.attr("backward")(top, propagate_down, bottom); } catch (bp::error_already_set) { PyErr_Print(); throw; @@ -60,7 +59,7 @@ class PythonLayer : public Layer<Dtype> { } private: - PyObject* self_; + bp::object self_; }; } // namespace caffe diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 4dcdc3dc..da1bab13 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -11,7 +11,7 @@ namespace caffe { /** * @brief An interface for classes that perform optimization on Net%s. * - * Requires implementation of ComputeUpdateValue to compute a parameter update + * Requires implementation of ApplyUpdate to compute a parameter update * given the current state of the Net parameters. */ template <typename Dtype> @@ -39,8 +39,8 @@ class Solver { int iter() { return iter_; } protected: - // Get the update value for the current iteration. - virtual void ComputeUpdateValue() = 0; + // Make and apply the update value for the current iteration. + virtual void ApplyUpdate() = 0; // The Solver::Snapshot function implements the basic snapshotting utility // that stores the learned net. You should implement the SnapshotSolverState() // function that produces a SolverState protocol buffer that needs to be @@ -80,7 +80,9 @@ class SGDSolver : public Solver<Dtype> { protected: void PreSolve(); Dtype GetLearningRate(); - virtual void ComputeUpdateValue(); + virtual void ApplyUpdate(); + virtual void Regularize(int param_id); + virtual void ComputeUpdateValue(int param_id, Dtype rate); virtual void ClipGradients(); virtual void SnapshotSolverState(SolverState * state); virtual void RestoreSolverState(const SolverState& state); @@ -102,7 +104,7 @@ class NesterovSolver : public SGDSolver<Dtype> { : SGDSolver<Dtype>(param_file) {} protected: - virtual void ComputeUpdateValue(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); DISABLE_COPY_AND_ASSIGN(NesterovSolver); }; @@ -116,7 +118,7 @@ class AdaGradSolver : public SGDSolver<Dtype> { : SGDSolver<Dtype>(param_file) { constructor_sanity_check(); } protected: - virtual void ComputeUpdateValue(); + virtual void ComputeUpdateValue(int param_id, Dtype rate); void constructor_sanity_check() { CHECK_EQ(0, this->param_.momentum()) << "Momentum cannot be used with AdaGrad."; diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index cd0ab8ba..a6bd86a9 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -453,6 +453,72 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> { }; #endif +/** + * @brief Does spatial pyramid pooling on the input image + * by taking the max, average, etc. within regions + * so that the result vector of different sized + * images are of the same size. + */ +template <typename Dtype> +class SPPLayer : public Layer<Dtype> { + public: + explicit SPPLayer(const LayerParameter& param) + : Layer<Dtype>(param) {} + virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top); + virtual void Reshape(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top); + + virtual inline const char* type() const { return "SPP"; } + virtual inline int ExactNumBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + // MAX POOL layers can output an extra top blob for the mask; + // others can only output the pooled inputs. + virtual inline int MaxTopBlobs() const { + return (this->layer_param_.pooling_param().pool() == + PoolingParameter_PoolMethod_MAX) ? 2 : 1; + } + + protected: + virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, + const vector<Blob<Dtype>*>& top); + virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, + const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); + // calculates the kernel and stride dimensions for the pooling layer, + // returns a correctly configured LayerParameter for a PoolingLayer + virtual LayerParameter GetPoolingParam(const int pyramid_level, + const int bottom_h, const int bottom_w, const SPPParameter spp_param); + + int pyramid_height_; + int bottom_h_, bottom_w_; + int channels_; + int kernel_h_, kernel_w_; + int pad_h_, pad_w_; + + /// the internal Split layer that feeds the pooling layers + shared_ptr<SplitLayer<Dtype> > split_layer_; + /// top vector holder used in call to the underlying SplitLayer::Forward + vector<Blob<Dtype>*> split_top_vec_; + /// bottom vector holder used in call to the underlying PoolingLayer::Forward + vector<vector<Blob<Dtype>*>*> pooling_bottom_vecs_; + /// the internal Pooling layers of different kernel sizes + vector<shared_ptr<PoolingLayer<Dtype> > > pooling_layers_; + /// top vector holders used in call to the underlying PoolingLayer::Forward + vector<vector<Blob<Dtype>*>*> pooling_top_vecs_; + /// pooling_outputs stores the outputs of the PoolingLayers + vector<Blob<Dtype>*> pooling_outputs_; + /// the internal Flatten layers that the Pooling layers feed into + vector<FlattenLayer<Dtype>*> flatten_layers_; + /// top vector holders used in call to the underlying FlattenLayer::Forward + vector<vector<Blob<Dtype>*>*> flatten_top_vecs_; + /// flatten_outputs stores the outputs of the FlattenLayers + vector<Blob<Dtype>*> flatten_outputs_; + /// bottom vector holder used in call to the underlying ConcatLayer::Forward + vector<Blob<Dtype>*> concat_bottom_vec_; + /// the internal Concat layers that the Flatten layers feed into + shared_ptr<ConcatLayer<Dtype> > concat_layer_; +}; + } // namespace caffe #endif // CAFFE_VISION_LAYERS_HPP_ |