summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--caffe2/observers/profile_observer.h84
-rw-r--r--caffe2/observers/profile_observer_gpu.cc36
-rw-r--r--caffe2/observers/profile_observer_test.cc81
3 files changed, 13 insertions, 188 deletions
diff --git a/caffe2/observers/profile_observer.h b/caffe2/observers/profile_observer.h
index b0cd04df77..ab110da088 100644
--- a/caffe2/observers/profile_observer.h
+++ b/caffe2/observers/profile_observer.h
@@ -31,11 +31,9 @@ namespace caffe2 {
/**
* This observer displays a description of each operator executed in a network.
- * This includes input and tensors (name, size, type), arguments, analytical
- *cost and execution time. This can be used to analyze different performance
- *characteristics. NOTE: Currently this observer only supports synchronized
- *computation. And for RNN, --caffe2_rnn_executor=False need to be set if want
- *to get the cost summary at the net level.
+ * This includes input and tensors (name, size, type), arguments, and execution
+ * time. This can be used to analyze different performance characteristics.
+ * NOTE: Currently this observer only supports synchronized computation
**/
class ProfileObserver;
@@ -54,30 +52,21 @@ class ProfileCounter {
class ProfileOperatorObserver : public ProfileCounter,
public ObserverBase<OperatorBase> {
public:
- struct DetailedStat {
- string opType;
- struct OpSchema::Cost c;
- };
explicit ProfileOperatorObserver(OperatorBase* subject) = delete;
explicit ProfileOperatorObserver(
OperatorBase* subject,
- DetailedStat* stat,
ProfileObserver* netObserver)
- : ObserverBase<OperatorBase>(subject),
- stat_(stat),
- netObserver_(netObserver) {
- stat->opType = subject->debug_def().type();
+ : ObserverBase<OperatorBase>(subject), netObserver_(netObserver) {
if (subject) {
net_position_ = subject->net_position();
}
}
explicit ProfileOperatorObserver(
OperatorBase* subject,
- DetailedStat* stat,
ProfileObserver* netObserver,
int net_position,
int rnn_order)
- : ProfileOperatorObserver(subject, stat, netObserver) {
+ : ProfileOperatorObserver(subject, netObserver) {
net_position_ = net_position;
rnn_order_ = rnn_order;
}
@@ -97,37 +86,7 @@ class ProfileOperatorObserver : public ProfileCounter,
return ss.str();
}
- OpSchema::Cost getOpCost() {
- const string& op_type = subject_->debug_def().type();
- auto* schema = OpSchemaRegistry::Schema(op_type);
- OpSchema::Cost cost;
- if (schema && schema->HasCostInferenceFunction()) {
- vector<TensorShape> shapes = subject_->InputTensorShapes();
-
- auto known_shapes = std::accumulate(
- shapes.begin(),
- shapes.end(),
- true,
- [](bool acc, const TensorShape& shape) {
- return acc && !shape.unknown_shape();
- });
- if (known_shapes) {
- cost = schema->InferCost(subject_->debug_def(), shapes);
- }
- }
- return cost;
- }
-
- void updateDetailedStat(const OpSchema::Cost cost) {
- stat_->c.flops += cost.flops;
- stat_->c.bytes_read += cost.bytes_read;
- stat_->c.bytes_written += cost.bytes_written;
- stat_->c.params_bytes += cost.params_bytes;
- }
-
protected:
- DetailedStat* stat_;
- OpSchema::Cost cost_;
ProfileObserver* netObserver_;
int net_position_; // Needed because this is not visible in RNN Executor
int rnn_order_ = OperatorBase::kNoNetPositionSet;
@@ -137,41 +96,20 @@ class ProfileOperatorObserver : public ProfileCounter,
void Stop() override;
};
-class ProfileObserver final : public ObserverBase<NetBase> {
+class ProfileObserver final : public OperatorAttachingNetObserver<
+ ProfileOperatorObserver,
+ ProfileObserver> {
public:
explicit ProfileObserver(NetBase* subject)
- : ObserverBase<NetBase>(subject),
- detailedOpStats_(subject->GetOperators().size()),
- net_name_(subject->Name()) {
- const auto& ops = subject->GetOperators();
- for (int i = 0; i < ops.size(); i++) {
- ops[i]->AttachObserver(caffe2::make_unique<ProfileOperatorObserver>(
- ops[i], &detailedOpStats_[i], this));
- }
- }
- ~ProfileObserver();
- CaffeMap<string, OpSchema::Cost> getAggregatedOpTypeCost() const {
- CaffeMap<string, OpSchema::Cost> cost_per_op_type;
- for (int idx = 0; idx < detailedOpStats_.size(); ++idx) {
- const auto& stat = detailedOpStats_[idx];
- uint64_t flops = stat.c.flops;
- uint64_t bytes_read = stat.c.bytes_read;
- uint64_t bytes_written = stat.c.bytes_written;
-
- cost_per_op_type[stat.opType].flops += flops;
- cost_per_op_type[stat.opType].bytes_read += bytes_read;
- cost_per_op_type[stat.opType].bytes_written += bytes_written;
- }
- return cost_per_op_type;
- }
+ : OperatorAttachingNetObserver<ProfileOperatorObserver, ProfileObserver>(
+ subject,
+ this) {}
void Start() override{};
void Stop() override{};
private:
vector<const ProfileOperatorObserver*> operator_observers_;
- std::vector<ProfileOperatorObserver::DetailedStat> detailedOpStats_;
- std::string net_name_;
};
} // namespace caffe2
diff --git a/caffe2/observers/profile_observer_gpu.cc b/caffe2/observers/profile_observer_gpu.cc
index 518801a895..5bd9b0a11b 100644
--- a/caffe2/observers/profile_observer_gpu.cc
+++ b/caffe2/observers/profile_observer_gpu.cc
@@ -58,10 +58,7 @@ void ProfileOperatorObserver::Dump() const {
LOG(INFO) << "Output " << o << ": " << printer.MetaStr(*tensor);
}
}
- LOG(INFO) << "Cost (flops, bytes_read, bytes_written, op_type):";
- LOG(INFO) << std::setw(15) << std::setfill(' ') << cost_.flops << " "
- << cost_.bytes_read << " " << cost_.bytes_written << " "
- << subject_->debug_def().type();
+
LOG(INFO) << "--------- Finished operator " << subject_->debug_def().type()
<< " in " << run_time_ << " ms ---------";
}
@@ -85,9 +82,6 @@ void ProfileOperatorObserver::Start() {
}
} else {
start_time_ = timer_.MilliSeconds();
-
- cost_ = getOpCost();
- updateDetailedStat(cost_);
}
}
@@ -124,32 +118,6 @@ std::unique_ptr<ObserverBase<OperatorBase>> ProfileOperatorObserver::rnnCopy(
int rnn_order) const {
return std::unique_ptr<ObserverBase<OperatorBase>>(
new ProfileOperatorObserver(
- subject, stat_, netObserver_, net_position_, rnn_order));
+ subject, netObserver_, net_position_, rnn_order));
}
-
-ProfileObserver::~ProfileObserver() {
- static std::mutex loggingMutex;
- std::lock_guard<std::mutex> lock(loggingMutex);
-
- CaffeMap<string, OpSchema::Cost> cost_per_op_type = getAggregatedOpTypeCost();
- // sort by decreasing flops.
- std::vector<std::pair<std::string, OpSchema::Cost>> cost_per_op_type_vec(
- cost_per_op_type.begin(), cost_per_op_type.end());
- std::sort(
- cost_per_op_type_vec.begin(),
- cost_per_op_type_vec.end(),
- [](const std::pair<std::string, OpSchema::Cost>& left,
- const std::pair<std::string, OpSchema::Cost>& right) {
- return left.second.flops > right.second.flops;
- });
- LOG(INFO) << "================ Detailed stats for net " << net_name_
- << " ================";
- LOG(INFO) << "Aggregated Cost (flops, bytes_read, bytes_written, op_type):";
- for (const auto& item : cost_per_op_type_vec) {
- LOG(INFO) << std::setw(15) << std::setfill(' ') << item.second.flops << " "
- << item.second.bytes_read << " " << item.second.bytes_written
- << " " << item.first;
- }
-}
-
} // namespace caffe2
diff --git a/caffe2/observers/profile_observer_test.cc b/caffe2/observers/profile_observer_test.cc
deleted file mode 100644
index 6a98695cec..0000000000
--- a/caffe2/observers/profile_observer_test.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-#include "caffe2/core/common.h"
-#include "caffe2/core/net.h"
-#include "caffe2/core/observer.h"
-#include "caffe2/core/operator.h"
-#include "profile_observer.h"
-
-#include <gtest/gtest.h>
-#include "caffe2/utils/proto_utils.h"
-
-namespace caffe2 {
-
-namespace {
-
-OperatorDef* add_op(
- const vector<string>& input,
- const vector<string>& output,
- const string& type,
- NetDef* net) {
- CHECK(net);
- auto& op = *net->add_op();
- op.set_type(type);
- for (const auto& in : input) {
- op.add_input(in);
- }
- for (const auto& out : output) {
- op.add_output(out);
- }
-
- return net->mutable_op(net->op_size() - 1);
-}
-
-void fill_tensor(
- const vector<int64_t>& shape,
- const vector<float>& data,
- TensorCPU* tensor) {
- tensor->Resize(shape);
- CAFFE_ENFORCE_EQ(data.size(), tensor->size());
- auto ptr = tensor->mutable_data<float>();
- for (int i = 0; i < tensor->size(); ++i) {
- ptr[i] = data[i];
- }
-}
-
-void add_blob(
- const string& name,
- const vector<int64_t>& shape,
- const vector<float>& data,
- Workspace* ws) {
- auto* blob = ws->CreateBlob(name);
- fill_tensor(shape, data, BlobGetMutableTensor(blob, CPU));
-}
-
-} // namespace
-
-TEST(ProfileObserverTest, TestFC) {
- Workspace ws;
- auto create_net_def = [&ws](int M, int N, int K) {
- auto net_def = std::make_shared<NetDef>();
- net_def->set_name("test");
- add_op({"X", "W", "b"}, {"Y"}, "FC", net_def.get());
- add_blob("W", {N, K}, vector<float>(N * K), &ws);
- add_blob("b", {N}, vector<float>(N), &ws);
- add_blob("X", {M, K}, vector<float>(M * K), &ws);
- return net_def;
- };
-
- int M = 2, N = 3, K = 4;
- NetBase* net = ws.CreateNet(create_net_def(M, N, K), true /*overwrite*/);
- auto net_ob = caffe2::make_unique<ProfileObserver>(net);
- const auto* ob = net_ob.get();
- auto* ref = net->AttachObserver(std::move(net_ob));
- net->Run();
- CAFFE_ENFORCE(ob);
- auto cost_per_op_type = ob->getAggregatedOpTypeCost();
- CAFFE_ENFORCE(cost_per_op_type["FC"].flops == M * N * (2 * K + 1));
- CAFFE_ENFORCE(
- cost_per_op_type["FC"].bytes_read == (K * (M + N) + N) * sizeof(float));
- CAFFE_ENFORCE(cost_per_op_type["FC"].bytes_written == M * N * sizeof(float));
- net->DetachObserver(ref);
-}
-} // namespace caffe2