summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/caffe/common.hpp2
-rw-r--r--include/caffe/common_layers.hpp39
-rw-r--r--include/caffe/data_layers.hpp3
-rw-r--r--include/caffe/filler.hpp71
-rw-r--r--include/caffe/layer.hpp1
-rw-r--r--include/caffe/loss_layers.hpp2
-rw-r--r--include/caffe/net.hpp3
-rw-r--r--include/caffe/neuron_layers.hpp4
-rw-r--r--include/caffe/python_layer.hpp13
-rw-r--r--include/caffe/solver.hpp14
-rw-r--r--include/caffe/vision_layers.hpp66
11 files changed, 188 insertions, 30 deletions
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 6cf80a37..5f86bc26 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -19,7 +19,7 @@
#include "caffe/util/device_alternate.hpp"
// gflags 2.1 issue: namespace google was changed to gflags without warning.
-// Luckily we will be able to use GFLAGS_GFAGS_H_ to detect if it is version
+// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's
// remove the following hack.
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index cae1c3e4..e6b42c14 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -295,6 +295,45 @@ class MVNLayer : public Layer<Dtype> {
/// sum_multiplier is used to carry out sum using BLAS
Blob<Dtype> sum_multiplier_;
+ Dtype eps_;
+};
+
+/*
+ * @brief Reshapes the input Blob into an arbitrary-sized output Blob.
+ *
+ * Note: similarly to FlattenLayer, this layer does not change the input values
+ * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
+ */
+template <typename Dtype>
+class ReshapeLayer : public Layer<Dtype> {
+ public:
+ explicit ReshapeLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "Reshape"; }
+ virtual inline int ExactNumBottomBlobs() const { return 1; }
+ virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {}
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {}
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
+
+ /// @brief vector of axes indices whose dimensions we'll copy from the bottom
+ vector<int> copy_axes_;
+ /// @brief the index of the axis whose dimension we infer, or -1 if none
+ int inferred_axis_;
+ /// @brief the product of the "constant" output dimensions
+ int constant_count_;
};
/**
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
index 2bb9d948..3958cb7e 100644
--- a/include/caffe/data_layers.hpp
+++ b/include/caffe/data_layers.hpp
@@ -14,7 +14,6 @@
#include "caffe/filler.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
-#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
@@ -29,7 +28,6 @@ template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
public:
explicit BaseDataLayer(const LayerParameter& param);
- virtual ~BaseDataLayer() {}
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden except by the BasePrefetchingDataLayer.
@@ -58,7 +56,6 @@ class BasePrefetchingDataLayer :
public:
explicit BasePrefetchingDataLayer(const LayerParameter& param)
: BaseDataLayer<Dtype>(param) {}
- virtual ~BasePrefetchingDataLayer() {}
// LayerSetUp: implements common data layer setup functionality, and calls
// DataLayerSetUp to do special data layer setup for individual layer types.
// This method may not be overridden.
diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
index bb18e8e1..ff3542e1 100644
--- a/include/caffe/filler.hpp
+++ b/include/caffe/filler.hpp
@@ -126,17 +126,18 @@ class PositiveUnitballFiller : public Filler<Dtype> {
};
/**
- * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$
- * is set inversely proportional to the number of incoming nodes.
+ * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
+ * set inversely proportional to number of incoming nodes, outgoing
+ * nodes, or their average.
*
* A Filler based on the paper [Bengio and Glorot 2010]: Understanding
- * the difficulty of training deep feedforward neuralnetworks, but does not
- * use the fan_out value.
+ * the difficulty of training deep feedforward neuralnetworks.
*
- * It fills the incoming matrix by randomly sampling uniform data from
- * [-scale, scale] where scale = sqrt(3 / fan_in) where fan_in is the number
- * of input nodes. You should make sure the input blob has shape (num, a, b, c)
- * where a * b * c = fan_in.
+ * It fills the incoming matrix by randomly sampling uniform data from [-scale,
+ * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
+ * average, depending on the variance_norm option. You should make sure the
+ * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
+ * = fan_out. Note that this is currently not the case for inner product layers.
*
* TODO(dox): make notation in above comment consistent with rest & use LaTeX.
*/
@@ -148,7 +149,16 @@ class XavierFiller : public Filler<Dtype> {
virtual void Fill(Blob<Dtype>* blob) {
CHECK(blob->count());
int fan_in = blob->count() / blob->num();
- Dtype scale = sqrt(Dtype(3) / fan_in);
+ int fan_out = blob->count() / blob->channels();
+ Dtype n = fan_in; // default to fan_in
+ if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_AVERAGE) {
+ n = (fan_in + fan_out) / Dtype(2);
+ } else if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_FAN_OUT) {
+ n = fan_out;
+ }
+ Dtype scale = sqrt(Dtype(3) / n);
caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
blob->mutable_cpu_data());
CHECK_EQ(this->filler_param_.sparse(), -1)
@@ -156,6 +166,47 @@ class XavierFiller : public Filler<Dtype> {
}
};
+/**
+ * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
+ * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
+ * nodes, outgoing nodes, or their average.
+ *
+ * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
+ * accounts for ReLU nonlinearities.
+ *
+ * Aside: for another perspective on the scaling factor, see the derivation of
+ * [Saxe, McClelland, and Ganguli 2013 (v3)].
+ *
+ * It fills the incoming matrix by randomly sampling Gaussian data with std =
+ * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
+ * the variance_norm option. You should make sure the input blob has shape (num,
+ * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
+ * is currently not the case for inner product layers.
+ */
+template <typename Dtype>
+class MSRAFiller : public Filler<Dtype> {
+ public:
+ explicit MSRAFiller(const FillerParameter& param)
+ : Filler<Dtype>(param) {}
+ virtual void Fill(Blob<Dtype>* blob) {
+ CHECK(blob->count());
+ int fan_in = blob->count() / blob->num();
+ int fan_out = blob->count() / blob->channels();
+ Dtype n = fan_in; // default to fan_in
+ if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_AVERAGE) {
+ n = (fan_in + fan_out) / Dtype(2);
+ } else if (this->filler_param_.variance_norm() ==
+ FillerParameter_VarianceNorm_FAN_OUT) {
+ n = fan_out;
+ }
+ Dtype std = sqrt(Dtype(2) / n);
+ caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
+ blob->mutable_cpu_data());
+ CHECK_EQ(this->filler_param_.sparse(), -1)
+ << "Sparsity not supported by this Filler.";
+ }
+};
/**
* @brief Get a specific filler from the specification given in FillerParameter.
@@ -176,6 +227,8 @@ Filler<Dtype>* GetFiller(const FillerParameter& param) {
return new UniformFiller<Dtype>(param);
} else if (type == "xavier") {
return new XavierFiller<Dtype>(param);
+ } else if (type == "msra") {
+ return new MSRAFiller<Dtype>(param);
} else {
CHECK(false) << "Unknown filler name: " << param.type();
}
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 2d13ef97..8f924a75 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -406,6 +406,7 @@ template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
Dtype loss = 0;
+ Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
index d3eecd2e..86c34241 100644
--- a/include/caffe/loss_layers.hpp
+++ b/include/caffe/loss_layers.hpp
@@ -605,8 +605,6 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
/// @copydoc SigmoidCrossEntropyLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
/**
* @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 075afebc..5665df1e 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -137,6 +137,9 @@ class Net {
inline const vector<Dtype>& blob_loss_weights() const {
return blob_loss_weights_;
}
+ inline const vector<bool>& layer_need_backward() const {
+ return layer_need_backward_;
+ }
/// @brief returns the parameters
inline const vector<shared_ptr<Blob<Dtype> > >& params() const {
return params_;
diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp
index 32321513..9cf233f0 100644
--- a/include/caffe/neuron_layers.hpp
+++ b/include/caffe/neuron_layers.hpp
@@ -8,7 +8,6 @@
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
-#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#define HDF5_DATA_DATASET_NAME "data"
@@ -734,7 +733,8 @@ class PReLULayer : public NeuronLayer<Dtype> {
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
bool channel_shared_;
- Blob<Dtype> multiplier_; // dot multipler for backward computation of params
+ Blob<Dtype> multiplier_; // dot multiplier for backward computation of params
+ Blob<Dtype> backward_buff_; // temporary buffer for backward computation
Blob<Dtype> bottom_memory_; // memory for in-place computation
};
diff --git a/include/caffe/python_layer.hpp b/include/caffe/python_layer.hpp
index 816ef453..19cf18c9 100644
--- a/include/caffe/python_layer.hpp
+++ b/include/caffe/python_layer.hpp
@@ -14,12 +14,12 @@ template <typename Dtype>
class PythonLayer : public Layer<Dtype> {
public:
PythonLayer(PyObject* self, const LayerParameter& param)
- : Layer<Dtype>(param), self_(self) { }
+ : Layer<Dtype>(param), self_(bp::handle<>(bp::borrowed(self))) { }
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
- bp::call_method<bp::object>(self_, "setup", bottom, top);
+ self_.attr("setup")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
@@ -29,7 +29,7 @@ class PythonLayer : public Layer<Dtype> {
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
- bp::call_method<bp::object>(self_, "reshape", bottom, top);
+ self_.attr("reshape")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
@@ -42,7 +42,7 @@ class PythonLayer : public Layer<Dtype> {
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
- bp::call_method<bp::object>(self_, "forward", bottom, top);
+ self_.attr("forward")(bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
@@ -51,8 +51,7 @@ class PythonLayer : public Layer<Dtype> {
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
try {
- bp::call_method<bp::object>(self_, "backward", top, propagate_down,
- bottom);
+ self_.attr("backward")(top, propagate_down, bottom);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
@@ -60,7 +59,7 @@ class PythonLayer : public Layer<Dtype> {
}
private:
- PyObject* self_;
+ bp::object self_;
};
} // namespace caffe
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 4dcdc3dc..da1bab13 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -11,7 +11,7 @@ namespace caffe {
/**
* @brief An interface for classes that perform optimization on Net%s.
*
- * Requires implementation of ComputeUpdateValue to compute a parameter update
+ * Requires implementation of ApplyUpdate to compute a parameter update
* given the current state of the Net parameters.
*/
template <typename Dtype>
@@ -39,8 +39,8 @@ class Solver {
int iter() { return iter_; }
protected:
- // Get the update value for the current iteration.
- virtual void ComputeUpdateValue() = 0;
+ // Make and apply the update value for the current iteration.
+ virtual void ApplyUpdate() = 0;
// The Solver::Snapshot function implements the basic snapshotting utility
// that stores the learned net. You should implement the SnapshotSolverState()
// function that produces a SolverState protocol buffer that needs to be
@@ -80,7 +80,9 @@ class SGDSolver : public Solver<Dtype> {
protected:
void PreSolve();
Dtype GetLearningRate();
- virtual void ComputeUpdateValue();
+ virtual void ApplyUpdate();
+ virtual void Regularize(int param_id);
+ virtual void ComputeUpdateValue(int param_id, Dtype rate);
virtual void ClipGradients();
virtual void SnapshotSolverState(SolverState * state);
virtual void RestoreSolverState(const SolverState& state);
@@ -102,7 +104,7 @@ class NesterovSolver : public SGDSolver<Dtype> {
: SGDSolver<Dtype>(param_file) {}
protected:
- virtual void ComputeUpdateValue();
+ virtual void ComputeUpdateValue(int param_id, Dtype rate);
DISABLE_COPY_AND_ASSIGN(NesterovSolver);
};
@@ -116,7 +118,7 @@ class AdaGradSolver : public SGDSolver<Dtype> {
: SGDSolver<Dtype>(param_file) { constructor_sanity_check(); }
protected:
- virtual void ComputeUpdateValue();
+ virtual void ComputeUpdateValue(int param_id, Dtype rate);
void constructor_sanity_check() {
CHECK_EQ(0, this->param_.momentum())
<< "Momentum cannot be used with AdaGrad.";
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
index cd0ab8ba..a6bd86a9 100644
--- a/include/caffe/vision_layers.hpp
+++ b/include/caffe/vision_layers.hpp
@@ -453,6 +453,72 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
};
#endif
+/**
+ * @brief Does spatial pyramid pooling on the input image
+ * by taking the max, average, etc. within regions
+ * so that the result vector of different sized
+ * images are of the same size.
+ */
+template <typename Dtype>
+class SPPLayer : public Layer<Dtype> {
+ public:
+ explicit SPPLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "SPP"; }
+ virtual inline int ExactNumBottomBlobs() const { return 1; }
+ virtual inline int MinTopBlobs() const { return 1; }
+ // MAX POOL layers can output an extra top blob for the mask;
+ // others can only output the pooled inputs.
+ virtual inline int MaxTopBlobs() const {
+ return (this->layer_param_.pooling_param().pool() ==
+ PoolingParameter_PoolMethod_MAX) ? 2 : 1;
+ }
+
+ protected:
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ // calculates the kernel and stride dimensions for the pooling layer,
+ // returns a correctly configured LayerParameter for a PoolingLayer
+ virtual LayerParameter GetPoolingParam(const int pyramid_level,
+ const int bottom_h, const int bottom_w, const SPPParameter spp_param);
+
+ int pyramid_height_;
+ int bottom_h_, bottom_w_;
+ int channels_;
+ int kernel_h_, kernel_w_;
+ int pad_h_, pad_w_;
+
+ /// the internal Split layer that feeds the pooling layers
+ shared_ptr<SplitLayer<Dtype> > split_layer_;
+ /// top vector holder used in call to the underlying SplitLayer::Forward
+ vector<Blob<Dtype>*> split_top_vec_;
+ /// bottom vector holder used in call to the underlying PoolingLayer::Forward
+ vector<vector<Blob<Dtype>*>*> pooling_bottom_vecs_;
+ /// the internal Pooling layers of different kernel sizes
+ vector<shared_ptr<PoolingLayer<Dtype> > > pooling_layers_;
+ /// top vector holders used in call to the underlying PoolingLayer::Forward
+ vector<vector<Blob<Dtype>*>*> pooling_top_vecs_;
+ /// pooling_outputs stores the outputs of the PoolingLayers
+ vector<Blob<Dtype>*> pooling_outputs_;
+ /// the internal Flatten layers that the Pooling layers feed into
+ vector<FlattenLayer<Dtype>*> flatten_layers_;
+ /// top vector holders used in call to the underlying FlattenLayer::Forward
+ vector<vector<Blob<Dtype>*>*> flatten_top_vecs_;
+ /// flatten_outputs stores the outputs of the FlattenLayers
+ vector<Blob<Dtype>*> flatten_outputs_;
+ /// bottom vector holder used in call to the underlying ConcatLayer::Forward
+ vector<Blob<Dtype>*> concat_bottom_vec_;
+ /// the internal Concat layers that the Flatten layers feed into
+ shared_ptr<ConcatLayer<Dtype> > concat_layer_;
+};
+
} // namespace caffe
#endif // CAFFE_VISION_LAYERS_HPP_