#ifndef CAFFE_NET_HPP_ #define CAFFE_NET_HPP_ #include #include #include #include #include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { /** * @brief Connects Layer%s together into a directed acyclic graph (DAG) * specified by a NetParameter. * * TODO(dox): more thorough description. */ template class Net { public: explicit Net(const NetParameter& param, const Net* root_net = NULL); explicit Net(const string& param_file, Phase phase, const Net* root_net = NULL); virtual ~Net() {} /// @brief Initialize a network with a NetParameter. void Init(const NetParameter& param); /** * @brief Run Forward with the input Blob%s already fed separately. * * You can get the input blobs using input_blobs(). */ const vector*>& ForwardPrefilled(Dtype* loss = NULL); /** * The From and To variants of Forward and Backward operate on the * (topological) ordering by which the net is specified. For general DAG * networks, note that (1) computing from one layer to another might entail * extra computation on unrelated branches, and (2) computation starting in * the middle may be incorrect if all of the layers of a fan-in are not * included. */ Dtype ForwardFromTo(int start, int end); Dtype ForwardFrom(int start); Dtype ForwardTo(int end); /// @brief Run forward using a set of bottom blobs, and return the result. const vector*>& Forward(const vector* > & bottom, Dtype* loss = NULL); /** * @brief Run forward using a serialized BlobProtoVector and return the * result as a serialized BlobProtoVector */ string Forward(const string& input_blob_protos, Dtype* loss = NULL); /** * @brief Zeroes out the diffs of all net parameters. * Should be run before Backward. */ void ClearParamDiffs(); /** * The network backward should take no input and output, since it solely * computes the gradient w.r.t the parameters, and the data has already been * provided during the forward pass. */ void Backward(); void BackwardFromTo(int start, int end); void BackwardFrom(int start); void BackwardTo(int end); /** * @brief Reshape all layers from bottom to top. * * This is useful to propagate changes to layer sizes without running * a forward pass, e.g. to compute output feature size. */ void Reshape(); Dtype ForwardBackward(const vector* > & bottom) { Dtype loss; Forward(bottom, &loss); Backward(); return loss; } /// @brief Updates the network weights based on the diff values computed. void Update(); /** * @brief Shares weight data of owner blobs with shared blobs. * * Note: this is called by Net::Init, and thus should normally not be * called manually. */ void ShareWeights(); /** * @brief For an already initialized net, implicitly copies (i.e., using no * additional memory) the pre-trained layers from another Net. */ void ShareTrainedLayersWith(const Net* other); // For an already initialized net, CopyTrainedLayersFrom() copies the already // trained layers from another net parameter instance. /** * @brief For an already initialized net, copies the pre-trained layers from * another Net. */ void CopyTrainedLayersFrom(const NetParameter& param); void CopyTrainedLayersFrom(const string trained_filename); void CopyTrainedLayersFromBinaryProto(const string trained_filename); void CopyTrainedLayersFromHDF5(const string trained_filename); /// @brief Writes the net to a proto. void ToProto(NetParameter* param, bool write_diff = false) const; /// @brief Writes the net to an HDF5 file. void ToHDF5(const string& filename, bool write_diff = false) const; /// @brief returns the network name. inline const string& name() const { return name_; } /// @brief returns the layer names inline const vector& layer_names() const { return layer_names_; } /// @brief returns the blob names inline const vector& blob_names() const { return blob_names_; } /// @brief returns the blobs inline const vector > >& blobs() const { return blobs_; } /// @brief returns the layers inline const vector > >& layers() const { return layers_; } /// @brief returns the phase: TRAIN or TEST inline Phase phase() const { return phase_; } /** * @brief returns the bottom vecs for each layer -- usually you won't * need this unless you do per-layer checks such as gradients. */ inline const vector*> >& bottom_vecs() const { return bottom_vecs_; } /** * @brief returns the top vecs for each layer -- usually you won't * need this unless you do per-layer checks such as gradients. */ inline const vector*> >& top_vecs() const { return top_vecs_; } /// @brief returns the ids of the top blobs of layer i inline const vector & top_ids(int i) const { CHECK_GE(i, 0) << "Invalid layer id"; CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id"; return top_id_vecs_[i]; } /// @brief returns the ids of the bottom blobs of layer i inline const vector & bottom_ids(int i) const { CHECK_GE(i, 0) << "Invalid layer id"; CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id"; return bottom_id_vecs_[i]; } inline const vector >& bottom_need_backward() const { return bottom_need_backward_; } inline const vector& blob_loss_weights() const { return blob_loss_weights_; } inline const vector& layer_need_backward() const { return layer_need_backward_; } /// @brief returns the parameters inline const vector > >& params() const { return params_; } inline const vector*>& learnable_params() const { return learnable_params_; } /// @brief returns the learnable parameter learning rate multipliers inline const vector& params_lr() const { return params_lr_; } inline const vector& has_params_lr() const { return has_params_lr_; } /// @brief returns the learnable parameter decay multipliers inline const vector& params_weight_decay() const { return params_weight_decay_; } inline const vector& has_params_decay() const { return has_params_decay_; } const map& param_names_index() const { return param_names_index_; } inline const vector& param_owners() const { return param_owners_; } inline const vector& param_display_names() const { return param_display_names_; } /// @brief Input and output blob numbers inline int num_inputs() const { return net_input_blobs_.size(); } inline int num_outputs() const { return net_output_blobs_.size(); } inline const vector*>& input_blobs() const { return net_input_blobs_; } inline const vector*>& output_blobs() const { return net_output_blobs_; } inline const vector& input_blob_indices() const { return net_input_blob_indices_; } inline const vector& output_blob_indices() const { return net_output_blob_indices_; } bool has_blob(const string& blob_name) const; const shared_ptr > blob_by_name(const string& blob_name) const; bool has_layer(const string& layer_name) const; const shared_ptr > layer_by_name(const string& layer_name) const; void set_debug_info(const bool value) { debug_info_ = value; } // Helpers for Init. /** * @brief Remove layers that the user specified should be excluded given the current * phase, level, and stage. */ static void FilterNet(const NetParameter& param, NetParameter* param_filtered); /// @brief return whether NetState state meets NetStateRule rule static bool StateMeetsRule(const NetState& state, const NetStateRule& rule, const string& layer_name); protected: // Helpers for Init. /// @brief Append a new input or top blob to the net. void AppendTop(const NetParameter& param, const int layer_id, const int top_id, set* available_blobs, map* blob_name_to_idx); /// @brief Append a new bottom blob to the net. int AppendBottom(const NetParameter& param, const int layer_id, const int bottom_id, set* available_blobs, map* blob_name_to_idx); /// @brief Append a new parameter blob to the net. void AppendParam(const NetParameter& param, const int layer_id, const int param_id); /// @brief Helper for displaying debug info in Forward about input Blobs. void InputDebugInfo(const int layer_id); /// @brief Helper for displaying debug info in Forward. void ForwardDebugInfo(const int layer_id); /// @brief Helper for displaying debug info in Backward. void BackwardDebugInfo(const int layer_id); /// @brief Helper for displaying debug info in Update. void UpdateDebugInfo(const int param_id); /// @brief The network name string name_; /// @brief The phase: TRAIN or TEST Phase phase_; /// @brief Individual layers in the net vector > > layers_; vector layer_names_; map layer_names_index_; vector layer_need_backward_; /// @brief the blobs storing intermediate results between the layer. vector > > blobs_; vector blob_names_; map blob_names_index_; vector blob_need_backward_; /// bottom_vecs stores the vectors containing the input for each layer. /// They don't actually host the blobs (blobs_ does), so we simply store /// pointers. vector*> > bottom_vecs_; vector > bottom_id_vecs_; vector > bottom_need_backward_; /// top_vecs stores the vectors containing the output for each layer vector*> > top_vecs_; vector > top_id_vecs_; /// Vector of weight in the loss (or objective) function of each net blob, /// indexed by blob_id. vector blob_loss_weights_; vector > param_id_vecs_; vector param_owners_; vector param_display_names_; vector > param_layer_indices_; map param_names_index_; /// blob indices for the input and the output of the net vector net_input_blob_indices_; vector net_output_blob_indices_; vector*> net_input_blobs_; vector*> net_output_blobs_; /// The parameters in the network. vector > > params_; vector*> learnable_params_; /** * The mapping from params_ -> learnable_params_: we have * learnable_param_ids_.size() == params_.size(), * and learnable_params_[learnable_param_ids_[i]] == params_[i].get() * if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer * and learnable_params_[learnable_param_ids_[i]] gives its owner. */ vector learnable_param_ids_; /// the learning rate multipliers for learnable_params_ vector params_lr_; vector has_params_lr_; /// the weight decay multipliers for learnable_params_ vector params_weight_decay_; vector has_params_decay_; /// The bytes of memory used by this net size_t memory_used_; /// Whether to compute and display debug info for the net. bool debug_info_; /// The root net that actually holds the shared layers in data parallelism const Net* const root_net_; DISABLE_COPY_AND_ASSIGN(Net); }; } // namespace caffe #endif // CAFFE_NET_HPP_