// Copyright Yangqing Jia 2013 #include #include #include #include #include "caffe/proto/caffe.pb.h" #include "caffe/layer.hpp" #include "caffe/net.hpp" using std::pair; using std::map; using std::set; namespace caffe { template Net::Net(const NetParameter& param, const vector* >& bottom) { // Basically, build all the layers and set up its connections. name_ = param.name(); map blob_name_to_idx; set available_blobs; int num_layers = param.layers_size(); CHECK_EQ(bottom.size(), param.input_size()) << "Incorrect bottom blob size."; // set the input blobs for (int i = 0; i < param.input_size(); ++i) { const string& blob_name = param.input(i); CHECK_GT(bottom[i]->count(), 0); shared_ptr > blob_pointer( new Blob(bottom[i]->num(), bottom[i]->channels(), bottom[i]->height(), bottom[i]->width())); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(false); net_input_blob_indices_.push_back(i); blob_name_to_idx[blob_name] = i; available_blobs.insert(blob_name); } // For each layer, set up their input and output bottom_vecs_.resize(param.layers_size()); top_vecs_.resize(param.layers_size()); bottom_id_vecs_.resize(param.layers_size()); top_id_vecs_.resize(param.layers_size()); for (int i = 0; i < param.layers_size(); ++i) { const LayerConnection& layer_connection = param.layers(i); const LayerParameter& layer_param = layer_connection.layer(); layers_.push_back(shared_ptr >(GetLayer(layer_param))); layer_names_.push_back(layer_param.name()); LOG(INFO) << "Creating Layer " << layer_param.name(); bool need_backward = false; // Figure out this layer's input and output for (int j = 0; j < layer_connection.bottom_size(); ++j) { const string& blob_name = layer_connection.bottom(j); const int blob_id = blob_name_to_idx[blob_name]; if (available_blobs.find(blob_name) == available_blobs.end()) { LOG(FATAL) << "Unknown blob input " << blob_name << " to layer" << j; } LOG(INFO) << layer_param.name() << " <- " << blob_name; bottom_vecs_[i].push_back( blobs_[blob_id].get()); bottom_id_vecs_[i].push_back(blob_id); // If a blob needs backward, this layer should provide it. need_backward |= blob_need_backward_[blob_id]; available_blobs.erase(blob_name); } for (int j = 0; j < layer_connection.top_size(); ++j) { const string& blob_name = layer_connection.top(j); // Check if we are doing in-place computation if (layer_connection.bottom_size() > j && blob_name == layer_connection.bottom(j)) { // In-place computation LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)"; available_blobs.insert(blob_name); top_vecs_[i].push_back( blobs_[blob_name_to_idx[blob_name]].get()); top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]); } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) { // If we are not doing in-place computation but has duplicated blobs, // raise an error. LOG(FATAL) << "Duplicate blobs produced by multiple sources."; } else { // Normal output. LOG(INFO) << layer_param.name() << " -> " << blob_name; shared_ptr > blob_pointer(new Blob()); blobs_.push_back(blob_pointer); blob_names_.push_back(blob_name); blob_need_backward_.push_back(false); blob_name_to_idx[blob_name] = blob_names_.size() - 1; available_blobs.insert(blob_name); top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get()); top_id_vecs_[i].push_back(blob_names_.size() - 1); } } // After this layer is connected, set it up. // LOG(INFO) << "Setting up " << layer_names_[i]; layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]); for (int topid = 0; topid < top_vecs_[i].size(); ++topid) { LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " " << top_vecs_[i][topid]->height() << " " << top_vecs_[i][topid]->width(); } // Check if this layer needs backward operation itself for (int j = 0; j < layers_[i]->layer_param().blobs_lr_size(); ++j) { need_backward |= (layers_[i]->layer_param().blobs_lr(j) > 0); } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { LOG(INFO) << layer_names_[i] << " needs backward computation."; for (int j = 0; j < top_id_vecs_[i].size(); ++j) { blob_need_backward_[top_id_vecs_[i][j]] = true; } } else { LOG(INFO) << layer_names_[i] << " does not need backward computation."; } } // In the end, all remaining blobs are considered output blobs. for (set::iterator it = available_blobs.begin(); it != available_blobs.end(); ++it) { LOG(INFO) << "This network produces output " << *it; net_output_blob_indices_.push_back(blob_name_to_idx[*it]); net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; } template void Net::GetLearningRateAndWeightDecay() { LOG(INFO) << "Collecting Learning Rate and Weight Decay."; for (int i = 0; i < layers_.size(); ++i) { vector > >& layer_blobs = layers_[i]->blobs(); for (int j = 0; j < layer_blobs.size(); ++j) { params_.push_back(layer_blobs[j]); } // push the learning rate mutlipliers if (layers_[i]->layer_param().blobs_lr_size()) { CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size()); for (int j = 0; j < layer_blobs.size(); ++j) { float local_lr = layers_[i]->layer_param().blobs_lr(j); CHECK_GE(local_lr, 0.); params_lr_.push_back(local_lr); } } else { for (int j = 0; j < layer_blobs.size(); ++j) { params_lr_.push_back(1.); } } // push the weight decay multipliers if (layers_[i]->layer_param().weight_decay_size()) { CHECK_EQ(layers_[i]->layer_param().weight_decay_size(), layer_blobs.size()); for (int j = 0; j < layer_blobs.size(); ++j) { float local_decay = layers_[i]->layer_param().weight_decay(j); CHECK_GE(local_decay, 0.); params_weight_decay_.push_back(local_decay); } } else { for (int j = 0; j < layer_blobs.size(); ++j) { params_weight_decay_.push_back(1.); } } } } template const vector*>& Net::Forward( const vector*> & bottom) { // Copy bottom to internal bottom for (int i = 0; i < bottom.size(); ++i) { blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]); } for (int i = 0; i < layers_.size(); ++i) { // LOG(ERROR) << "Forwarding " << layer_names_[i]; layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]); } return net_output_blobs_; } template Dtype Net::Backward() { Dtype loss = 0; for (int i = layers_.size() - 1; i >= 0; --i) { if (layer_need_backward_[i]) { Dtype layer_loss = layers_[i]->Backward( top_vecs_[i], true, &bottom_vecs_[i]); loss += layer_loss; } } return loss; } template void Net::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layers_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layers(i).layer(); const string& source_layer_name = source_layer.name(); int target_layer_id = 0; while (target_layer_id != layer_names_.size() && layer_names_[target_layer_id] != source_layer_name) { ++target_layer_id; } if (target_layer_id == layer_names_.size()) { DLOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Loading source layer " << source_layer_name; vector > >& target_blobs = layers_[target_layer_id]->blobs(); CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; for (int j = 0; j < target_blobs.size(); ++j) { CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num()); CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels()); CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height()); CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width()); target_blobs[j]->FromProto(source_layer.blobs(j)); } } } template void Net::ToProto(NetParameter* param, bool write_diff) { param->Clear(); param->set_name(name_); // Add bottom and top for (int i = 0; i < net_input_blob_indices_.size(); ++i) { param->add_input(blob_names_[net_input_blob_indices_[i]]); } DLOG(INFO) << "Serializing " << layers_.size() << " layers"; for (int i = 0; i < layers_.size(); ++i) { LayerConnection* layer_connection = param->add_layers(); for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) { layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]); } for (int j = 0; j < top_id_vecs_[i].size(); ++j) { layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]); } LayerParameter* layer_parameter = layer_connection->mutable_layer(); layers_[i]->ToProto(layer_parameter, write_diff); } } template void Net::Update() { for (int i = 0; i < params_.size(); ++i) { params_[i]->Update(); } } INSTANTIATE_CLASS(Net); } // namespace caffe