diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/caffe/proto/caffe.proto | 10 | ||||
-rw-r--r-- | src/caffe/solver.cpp | 2 |
2 files changed, 1 insertions, 11 deletions
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 45f21981..22716a18 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -112,16 +112,6 @@ message SolverParameter { // whether to snapshot diff in the results or not. Snapshotting diff will help // debugging but the final protocol buffer size will be much larger. optional bool snapshot_diff = 14 [ default = false]; - // Adagrad solver parameters - // For Adagrad, we will first run normal sgd using the sgd parameters above - // for adagrad_skip iterations, and then kick in the adagrad algorithm, with - // the learning rate being adagrad_gamma * adagrad_skip. Note that the adagrad - // algorithm will NOT use the learning rate multiplier that is specified in - // the layer parameter specifications, as it will adjust the learning rate - // of individual parameters in a data-dependent way. - // WORK IN PROGRESS: not actually implemented yet. - optional float adagrad_gamma = 15; // adagrad learning rate multiplier - optional float adagrad_skip = 16; // the steps to skip before adagrad kicks in } // A message that stores the solver snapshots diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 87c346f7..bf4bdbc6 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -116,7 +116,7 @@ Dtype SGDSolver<Dtype>::GetLearningRate() { template <typename Dtype> void SGDSolver<Dtype>::PreSolve() { - // First of all, see if we need to initialize the history + // Initialize the history vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params(); history_.clear(); for (int i = 0; i < net_params.size(); ++i) { |