diff options
-rw-r--r-- | caffe2/observers/profile_observer.h | 84 | ||||
-rw-r--r-- | caffe2/observers/profile_observer_gpu.cc | 36 | ||||
-rw-r--r-- | caffe2/observers/profile_observer_test.cc | 81 |
3 files changed, 13 insertions, 188 deletions
diff --git a/caffe2/observers/profile_observer.h b/caffe2/observers/profile_observer.h index b0cd04df77..ab110da088 100644 --- a/caffe2/observers/profile_observer.h +++ b/caffe2/observers/profile_observer.h @@ -31,11 +31,9 @@ namespace caffe2 { /** * This observer displays a description of each operator executed in a network. - * This includes input and tensors (name, size, type), arguments, analytical - *cost and execution time. This can be used to analyze different performance - *characteristics. NOTE: Currently this observer only supports synchronized - *computation. And for RNN, --caffe2_rnn_executor=False need to be set if want - *to get the cost summary at the net level. + * This includes input and tensors (name, size, type), arguments, and execution + * time. This can be used to analyze different performance characteristics. + * NOTE: Currently this observer only supports synchronized computation **/ class ProfileObserver; @@ -54,30 +52,21 @@ class ProfileCounter { class ProfileOperatorObserver : public ProfileCounter, public ObserverBase<OperatorBase> { public: - struct DetailedStat { - string opType; - struct OpSchema::Cost c; - }; explicit ProfileOperatorObserver(OperatorBase* subject) = delete; explicit ProfileOperatorObserver( OperatorBase* subject, - DetailedStat* stat, ProfileObserver* netObserver) - : ObserverBase<OperatorBase>(subject), - stat_(stat), - netObserver_(netObserver) { - stat->opType = subject->debug_def().type(); + : ObserverBase<OperatorBase>(subject), netObserver_(netObserver) { if (subject) { net_position_ = subject->net_position(); } } explicit ProfileOperatorObserver( OperatorBase* subject, - DetailedStat* stat, ProfileObserver* netObserver, int net_position, int rnn_order) - : ProfileOperatorObserver(subject, stat, netObserver) { + : ProfileOperatorObserver(subject, netObserver) { net_position_ = net_position; rnn_order_ = rnn_order; } @@ -97,37 +86,7 @@ class ProfileOperatorObserver : public ProfileCounter, return ss.str(); } - OpSchema::Cost getOpCost() { - const string& op_type = subject_->debug_def().type(); - auto* schema = OpSchemaRegistry::Schema(op_type); - OpSchema::Cost cost; - if (schema && schema->HasCostInferenceFunction()) { - vector<TensorShape> shapes = subject_->InputTensorShapes(); - - auto known_shapes = std::accumulate( - shapes.begin(), - shapes.end(), - true, - [](bool acc, const TensorShape& shape) { - return acc && !shape.unknown_shape(); - }); - if (known_shapes) { - cost = schema->InferCost(subject_->debug_def(), shapes); - } - } - return cost; - } - - void updateDetailedStat(const OpSchema::Cost cost) { - stat_->c.flops += cost.flops; - stat_->c.bytes_read += cost.bytes_read; - stat_->c.bytes_written += cost.bytes_written; - stat_->c.params_bytes += cost.params_bytes; - } - protected: - DetailedStat* stat_; - OpSchema::Cost cost_; ProfileObserver* netObserver_; int net_position_; // Needed because this is not visible in RNN Executor int rnn_order_ = OperatorBase::kNoNetPositionSet; @@ -137,41 +96,20 @@ class ProfileOperatorObserver : public ProfileCounter, void Stop() override; }; -class ProfileObserver final : public ObserverBase<NetBase> { +class ProfileObserver final : public OperatorAttachingNetObserver< + ProfileOperatorObserver, + ProfileObserver> { public: explicit ProfileObserver(NetBase* subject) - : ObserverBase<NetBase>(subject), - detailedOpStats_(subject->GetOperators().size()), - net_name_(subject->Name()) { - const auto& ops = subject->GetOperators(); - for (int i = 0; i < ops.size(); i++) { - ops[i]->AttachObserver(caffe2::make_unique<ProfileOperatorObserver>( - ops[i], &detailedOpStats_[i], this)); - } - } - ~ProfileObserver(); - CaffeMap<string, OpSchema::Cost> getAggregatedOpTypeCost() const { - CaffeMap<string, OpSchema::Cost> cost_per_op_type; - for (int idx = 0; idx < detailedOpStats_.size(); ++idx) { - const auto& stat = detailedOpStats_[idx]; - uint64_t flops = stat.c.flops; - uint64_t bytes_read = stat.c.bytes_read; - uint64_t bytes_written = stat.c.bytes_written; - - cost_per_op_type[stat.opType].flops += flops; - cost_per_op_type[stat.opType].bytes_read += bytes_read; - cost_per_op_type[stat.opType].bytes_written += bytes_written; - } - return cost_per_op_type; - } + : OperatorAttachingNetObserver<ProfileOperatorObserver, ProfileObserver>( + subject, + this) {} void Start() override{}; void Stop() override{}; private: vector<const ProfileOperatorObserver*> operator_observers_; - std::vector<ProfileOperatorObserver::DetailedStat> detailedOpStats_; - std::string net_name_; }; } // namespace caffe2 diff --git a/caffe2/observers/profile_observer_gpu.cc b/caffe2/observers/profile_observer_gpu.cc index 518801a895..5bd9b0a11b 100644 --- a/caffe2/observers/profile_observer_gpu.cc +++ b/caffe2/observers/profile_observer_gpu.cc @@ -58,10 +58,7 @@ void ProfileOperatorObserver::Dump() const { LOG(INFO) << "Output " << o << ": " << printer.MetaStr(*tensor); } } - LOG(INFO) << "Cost (flops, bytes_read, bytes_written, op_type):"; - LOG(INFO) << std::setw(15) << std::setfill(' ') << cost_.flops << " " - << cost_.bytes_read << " " << cost_.bytes_written << " " - << subject_->debug_def().type(); + LOG(INFO) << "--------- Finished operator " << subject_->debug_def().type() << " in " << run_time_ << " ms ---------"; } @@ -85,9 +82,6 @@ void ProfileOperatorObserver::Start() { } } else { start_time_ = timer_.MilliSeconds(); - - cost_ = getOpCost(); - updateDetailedStat(cost_); } } @@ -124,32 +118,6 @@ std::unique_ptr<ObserverBase<OperatorBase>> ProfileOperatorObserver::rnnCopy( int rnn_order) const { return std::unique_ptr<ObserverBase<OperatorBase>>( new ProfileOperatorObserver( - subject, stat_, netObserver_, net_position_, rnn_order)); + subject, netObserver_, net_position_, rnn_order)); } - -ProfileObserver::~ProfileObserver() { - static std::mutex loggingMutex; - std::lock_guard<std::mutex> lock(loggingMutex); - - CaffeMap<string, OpSchema::Cost> cost_per_op_type = getAggregatedOpTypeCost(); - // sort by decreasing flops. - std::vector<std::pair<std::string, OpSchema::Cost>> cost_per_op_type_vec( - cost_per_op_type.begin(), cost_per_op_type.end()); - std::sort( - cost_per_op_type_vec.begin(), - cost_per_op_type_vec.end(), - [](const std::pair<std::string, OpSchema::Cost>& left, - const std::pair<std::string, OpSchema::Cost>& right) { - return left.second.flops > right.second.flops; - }); - LOG(INFO) << "================ Detailed stats for net " << net_name_ - << " ================"; - LOG(INFO) << "Aggregated Cost (flops, bytes_read, bytes_written, op_type):"; - for (const auto& item : cost_per_op_type_vec) { - LOG(INFO) << std::setw(15) << std::setfill(' ') << item.second.flops << " " - << item.second.bytes_read << " " << item.second.bytes_written - << " " << item.first; - } -} - } // namespace caffe2 diff --git a/caffe2/observers/profile_observer_test.cc b/caffe2/observers/profile_observer_test.cc deleted file mode 100644 index 6a98695cec..0000000000 --- a/caffe2/observers/profile_observer_test.cc +++ /dev/null @@ -1,81 +0,0 @@ -#include "caffe2/core/common.h" -#include "caffe2/core/net.h" -#include "caffe2/core/observer.h" -#include "caffe2/core/operator.h" -#include "profile_observer.h" - -#include <gtest/gtest.h> -#include "caffe2/utils/proto_utils.h" - -namespace caffe2 { - -namespace { - -OperatorDef* add_op( - const vector<string>& input, - const vector<string>& output, - const string& type, - NetDef* net) { - CHECK(net); - auto& op = *net->add_op(); - op.set_type(type); - for (const auto& in : input) { - op.add_input(in); - } - for (const auto& out : output) { - op.add_output(out); - } - - return net->mutable_op(net->op_size() - 1); -} - -void fill_tensor( - const vector<int64_t>& shape, - const vector<float>& data, - TensorCPU* tensor) { - tensor->Resize(shape); - CAFFE_ENFORCE_EQ(data.size(), tensor->size()); - auto ptr = tensor->mutable_data<float>(); - for (int i = 0; i < tensor->size(); ++i) { - ptr[i] = data[i]; - } -} - -void add_blob( - const string& name, - const vector<int64_t>& shape, - const vector<float>& data, - Workspace* ws) { - auto* blob = ws->CreateBlob(name); - fill_tensor(shape, data, BlobGetMutableTensor(blob, CPU)); -} - -} // namespace - -TEST(ProfileObserverTest, TestFC) { - Workspace ws; - auto create_net_def = [&ws](int M, int N, int K) { - auto net_def = std::make_shared<NetDef>(); - net_def->set_name("test"); - add_op({"X", "W", "b"}, {"Y"}, "FC", net_def.get()); - add_blob("W", {N, K}, vector<float>(N * K), &ws); - add_blob("b", {N}, vector<float>(N), &ws); - add_blob("X", {M, K}, vector<float>(M * K), &ws); - return net_def; - }; - - int M = 2, N = 3, K = 4; - NetBase* net = ws.CreateNet(create_net_def(M, N, K), true /*overwrite*/); - auto net_ob = caffe2::make_unique<ProfileObserver>(net); - const auto* ob = net_ob.get(); - auto* ref = net->AttachObserver(std::move(net_ob)); - net->Run(); - CAFFE_ENFORCE(ob); - auto cost_per_op_type = ob->getAggregatedOpTypeCost(); - CAFFE_ENFORCE(cost_per_op_type["FC"].flops == M * N * (2 * K + 1)); - CAFFE_ENFORCE( - cost_per_op_type["FC"].bytes_read == (K * (M + N) + N) * sizeof(float)); - CAFFE_ENFORCE(cost_per_op_type["FC"].bytes_written == M * N * sizeof(float)); - net->DetachObserver(ref); -} -} // namespace caffe2 |