summaryrefslogtreecommitdiff
path: root/caffe2
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@fb.com>2019-02-07 14:11:44 -0800
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-02-07 14:51:32 -0800
commit0f42a1ed292db3e2951ceae3abb1eeec8d909aad (patch)
tree0b94ce34997a9501a7fb8887f2e093b35bba73c2 /caffe2
parent66084c0bc92d3624738c829af60b49766a905090 (diff)
downloadpytorch-0f42a1ed292db3e2951ceae3abb1eeec8d909aad.tar.gz
pytorch-0f42a1ed292db3e2951ceae3abb1eeec8d909aad.tar.bz2
pytorch-0f42a1ed292db3e2951ceae3abb1eeec8d909aad.zip
Use bound shape inference in SparseNN tests (#16834)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16834 Inserting AdjustBatch ops will possibly change the names of the input/output, so we need to create a mapping and use the renamed names for external_inputs/outputs and input_shape_info for the onnxifi_net. Reviewed By: ipiszy Differential Revision: D13982731 fbshipit-source-id: c18b8a03d01490162929b2ca30c182d166001626
Diffstat (limited to 'caffe2')
-rw-r--r--caffe2/opt/bound_shape_inferencer.cc6
-rw-r--r--caffe2/opt/onnxifi_transformer.cc212
-rw-r--r--caffe2/opt/onnxifi_transformer.h3
3 files changed, 138 insertions, 83 deletions
diff --git a/caffe2/opt/bound_shape_inferencer.cc b/caffe2/opt/bound_shape_inferencer.cc
index 1ea16098f6..166532d8bd 100644
--- a/caffe2/opt/bound_shape_inferencer.cc
+++ b/caffe2/opt/bound_shape_inferencer.cc
@@ -47,7 +47,7 @@ void BoundShapeInferencer::InferBoundShapeAndType(
visited_tensors_.clear();
for (const auto& op : net.op()) {
- LOG(INFO) << op.type();
+ VLOG(1) << op.type();
if (op.type() == "SparseLengthsSum" ||
op.type() == "SparseLengthsSumFused8BitRowwise") {
InferSparseLengthsSum(op);
@@ -215,6 +215,10 @@ void BoundShapeInferencer::InferConcat(const OperatorDef& op) {
}
}
InferCommonOp(op);
+ // split_info should be a constant
+ if (op.output_size() > 1) {
+ shape_info_[op.output(1)].dim_type = ShapeInfo::DimType::CONSTANT;
+ }
}
void BoundShapeInferencer::InferFC(const OperatorDef& op) {
diff --git a/caffe2/opt/onnxifi_transformer.cc b/caffe2/opt/onnxifi_transformer.cc
index 0eaea78578..7b8ee3404f 100644
--- a/caffe2/opt/onnxifi_transformer.cc
+++ b/caffe2/opt/onnxifi_transformer.cc
@@ -21,6 +21,7 @@ using ShapeInfoMap = std::unordered_map<std::string, ShapeInfo>;
const std::string kNetPos("net_pos");
const std::string kModelId("model_id");
+const std::string kRealBatchSizeBlob("real_batch_size");
constexpr size_t kBufferSize = 64;
void AnnotateOpIndex(NetDef* net) {
@@ -118,8 +119,7 @@ ShapeInfoMap InferShapes(
shape_map.emplace(
std::piecewise_construct,
std::forward_as_tuple(kv.first),
- std::forward_as_tuple(
- ShapeInfo::DimType::CONSTANT, kv.second.shape));
+ std::forward_as_tuple(kv.second.dim_type, kv.second.shape));
}
} else {
// TODO: deprecate this path
@@ -233,27 +233,23 @@ void FillModelInfo(::ONNX_NAMESPACE::ModelProto* model) {
opset_id->set_version(7);
}
-string MkBatchSizeBlob() {
- return "real_batch_size";
-}
-
-string MkSeqSizeBlob(const string& blob_name) {
+std::string MakeSeqSizeBlob(const std::string& blob_name) {
return blob_name + "_real_seq_size";
}
-string MkOutputForAdjustBatchOp(const string& input) {
+std::string MakeOutputForAdjustBatchOp(const std::string& input) {
return input + "_post_adjust_batch";
}
-string MkInputForAdjustBatchOp(const string& output) {
+std::string MakeInputForAdjustBatchOp(const std::string& output) {
return output + "_pre_adjust_batch";
}
-OperatorDef MkAdjustBatchOp(
- const string& input_blob,
- const string& output_blob,
+OperatorDef MakeAdjustBatchOp(
+ const std::string& input_blob,
+ const std::string& output_blob,
int max_batch_size,
- const string& real_batch_size_blob,
+ const std::string& real_batch_size_blob,
bool adjust_to_max_batch_size) {
OperatorDef adjust_batch_op;
adjust_batch_op.set_type("AdjustBatch");
@@ -263,7 +259,9 @@ OperatorDef MkAdjustBatchOp(
adjust_batch_op.add_input(input_blob);
adjust_batch_op.add_output(output_blob);
if (adjust_to_max_batch_size) {
- adjust_batch_op.add_output(real_batch_size_blob);
+ if (!real_batch_size_blob.empty()) {
+ adjust_batch_op.add_output(real_batch_size_blob);
+ }
} else {
adjust_batch_op.add_input(real_batch_size_blob);
}
@@ -290,19 +288,22 @@ int64_t GetBlob1stDimSize(
// Generates AdjustBatchOps for external inputs / outputs with type BATCH or
// SEQ and adds them to input_ops and output_ops.
// Meanwhile, modifies inputs / outputs of corresponding operators in the
-// wrapper_net to use the new inputs / outputs of AdjustBatchOps.
-void AddAdjustBatchOps(
+// onnxifi_net to use the new inputs / outputs of AdjustBatchOps.
+std::unordered_map<std::string, std::string> AddAdjustBatchOps(
const ShapeInfoMap& shape_hints,
- NetDef* wrapper_net,
+ NetDef* onnxifi_net,
vector<OperatorDef>* input_ops,
vector<OperatorDef>* output_ops) {
- const auto external_inputs = ToHashSet(wrapper_net->external_input());
- const auto external_outputs = ToHashSet(wrapper_net->external_output());
+ std::unordered_map<std::string, std::string> renaming_map;
+ const auto external_inputs = ToHashSet(onnxifi_net->external_input());
+ const auto external_outputs = ToHashSet(onnxifi_net->external_output());
+ std::unordered_set<std::string> real_batch_size_blobs;
- for (auto& op : *(wrapper_net->mutable_op())) {
+ for (auto& op : *(onnxifi_net->mutable_op())) {
// Add AdjustBatchOp for all external inputs with type BATCH or SEQ.
// This will adjust the batch/seq size to the batch/seq size inferred by
- // bound_shape_inference.
+ // bound_shape_inference. Note that we only produce real batch size tensor
+ // once to avoid data race
for (auto& input_blob : *(op.mutable_input())) {
if (external_inputs.count(input_blob)) {
auto shape_info_it = shape_hints.find(input_blob);
@@ -313,24 +314,27 @@ void AddAdjustBatchOps(
}
string real_batch_size_blob = "";
if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
- real_batch_size_blob = MkBatchSizeBlob();
+ real_batch_size_blob = kRealBatchSizeBlob;
} else if (shape_info_it->second.dim_type == ShapeInfo::DimType::SEQ) {
- real_batch_size_blob = MkSeqSizeBlob(input_blob);
+ real_batch_size_blob = MakeSeqSizeBlob(input_blob);
} else {
continue;
}
- auto output_blob = MkOutputForAdjustBatchOp(input_blob);
- input_ops->push_back(MkAdjustBatchOp(
+ auto output_blob = MakeOutputForAdjustBatchOp(input_blob);
+ auto ret = real_batch_size_blobs.emplace(real_batch_size_blob);
+ input_ops->push_back(MakeAdjustBatchOp(
input_blob,
output_blob,
GetBlob1stDimSize(shape_info_it->second, input_blob),
- real_batch_size_blob,
+ ret.second ? real_batch_size_blob : "",
true /* adjust_to_max_batch_size */));
+ renaming_map[input_blob] = output_blob;
input_blob = output_blob;
}
}
- // Add AdjustBatchOp for all external outputs with type BATCH.
- // This will adjust the batch size to the original batch size.
+ // Add AdjustBatchOp for all external outputs with type BATCH if the real
+ // batch size is presented. This will adjust the batch size to the original
+ // batch size.
for (auto& output_blob : *(op.mutable_output())) {
if (external_outputs.count(output_blob)) {
auto shape_info_it = shape_hints.find(output_blob);
@@ -338,13 +342,17 @@ void AddAdjustBatchOps(
continue;
}
if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
- auto input_blob = MkInputForAdjustBatchOp(output_blob);
- output_ops->push_back(MkAdjustBatchOp(
+ if (!real_batch_size_blobs.count(kRealBatchSizeBlob)) {
+ continue;
+ }
+ auto input_blob = MakeInputForAdjustBatchOp(output_blob);
+ output_ops->push_back(MakeAdjustBatchOp(
input_blob,
output_blob,
GetBlob1stDimSize(shape_info_it->second, output_blob),
- MkBatchSizeBlob(),
+ kRealBatchSizeBlob,
false /* adjust_to_max_batch_size */));
+ renaming_map[output_blob] = input_blob;
output_blob = input_blob;
} else {
CAFFE_ENFORCE(
@@ -355,6 +363,8 @@ void AddAdjustBatchOps(
}
}
}
+
+ return renaming_map;
}
NetDef ComposeResultNet(
@@ -363,12 +373,12 @@ NetDef ComposeResultNet(
const OperatorDef& onnxifi_op) {
NetDef net_opt;
for (const auto& op : input_ops) {
- *(net_opt.add_op()) = op;
+ net_opt.add_op()->CopyFrom(op);
}
- *(net_opt.add_op()) = onnxifi_op;
+ net_opt.add_op()->CopyFrom(onnxifi_op);
// Add AdjustBatch ops for output blobs to the net.
for (const auto& op : output_ops) {
- *(net_opt.add_op()) = op;
+ net_opt.add_op()->CopyFrom(op);
}
return net_opt;
}
@@ -402,7 +412,8 @@ OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
const std::string& onnx_model_str,
const std::unordered_map<std::string, TensorShape>& output_shape_hints,
const std::unordered_set<std::string>& initialization_list,
- const caffe2::NetDef& net) {
+ const std::vector<std::string>& external_inputs,
+ const std::vector<std::string>& external_outputs) {
OperatorDef op;
op.set_type("Onnxifi");
auto* onnx_model_arg = op.add_arg();
@@ -421,7 +432,7 @@ OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
// Add the input/output
auto* input_names = op.add_arg();
input_names->set_name("input_names");
- for (const auto& input : net.external_input()) {
+ for (const auto& input : external_inputs) {
if (!initialization_list.count(input)) {
op.add_input(input);
input_names->add_strings(input);
@@ -429,7 +440,7 @@ OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
}
auto* output_names = op.add_arg();
output_names->set_name("output_names");
- for (const auto& output : net.external_output()) {
+ for (const auto& output : external_outputs) {
op.add_output(output);
output_names->add_strings(output);
}
@@ -469,17 +480,7 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaC2(
const std::unordered_set<std::string>& weights_in_ws,
const ShapeInfoMap& shape_hints) {
// We already have all the ops and external inputs and outputs!
- NetDef wrapper_net(net);
-
- // Compute output shape hints
- std::unordered_map<std::string, TensorShape> output_shape_hints;
- for (const auto& o : wrapper_net.external_output()) {
- const auto it = shape_hints.find(o);
- CAFFE_ENFORCE(
- it != shape_hints.end(), "Cannot find shape info for output ", o);
- const auto& shape = it->second.shape;
- output_shape_hints.emplace(o, shape);
- }
+ NetDef onnxifi_net(net);
// Remove the second output of Concat from external_output. In addition, we
// remove those outputs from the Onnxifi op too.
@@ -488,54 +489,84 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaC2(
// where we statically computes the split_info given input shape and insert a
// GivenTensorIntFill op
std::unordered_set<std::string> split_infos;
- NetDef net_copy(net);
- for (auto& op : *wrapper_net.mutable_op()) {
+ for (auto& op : *onnxifi_net.mutable_op()) {
if (op.type() == "Concat" && op.output_size() == 2) {
split_infos.emplace(op.output(1));
}
}
- wrapper_net.clear_external_output();
- net_copy.clear_external_output();
+ onnxifi_net.clear_external_output();
for (const auto& o : net.external_output()) {
if (!split_infos.count(o)) {
- wrapper_net.add_external_output(o);
- net_copy.add_external_output(o);
+ onnxifi_net.add_external_output(o);
}
}
+ // Insert AdjustBatch ops, note that this step will possibly change the names
+ // of the input/output, so we need to create a mapping and use the renamed
+ // names for external_inputs/outputs and input_shape_info for the onnxifi_net.
vector<OperatorDef> input_ops;
vector<OperatorDef> output_ops;
- AddAdjustBatchOps(shape_hints, &wrapper_net, &input_ops, &output_ops);
+ auto renaming_map =
+ AddAdjustBatchOps(shape_hints, &onnxifi_net, &input_ops, &output_ops);
// Figure out weights and add it to external_inputs too
- std::vector<std::string> extra_weights;
std::unordered_set<std::string> initialization_list;
std::vector<std::string> total_inputs_vec;
GetWeightsAndInputs(
net,
weights_in_ws,
- extra_weights,
+ std::vector<std::string>(),
&initialization_list,
&total_inputs_vec);
- auto* shape_arg = wrapper_net.add_arg();
+ auto* shape_arg = onnxifi_net.add_arg();
shape_arg->set_name("input_shape_info");
- wrapper_net.clear_external_input();
+ onnxifi_net.clear_external_input();
for (const auto& i : total_inputs_vec) {
- wrapper_net.add_external_input(i);
+ auto input = i;
+ const auto it = renaming_map.find(i);
+ if (it != renaming_map.end()) {
+ input = it->second;
+ }
+ onnxifi_net.add_external_input(input);
shape_arg->mutable_tensors()->Add()->CopyFrom(
- WrapShapeInfoIntoTensorProto(i, shape_hints.at(i)));
+ WrapShapeInfoIntoTensorProto(input, shape_hints.at(i)));
+ }
+
+ // Compute output shape hints
+ std::unordered_map<std::string, TensorShape> output_shape_hints;
+ for (auto& o : *onnxifi_net.mutable_external_output()) {
+ auto output = o;
+ const auto rit = renaming_map.find(o);
+ if (rit != renaming_map.end()) {
+ output = rit->second;
+ }
+ const auto it = shape_hints.find(o);
+ CAFFE_ENFORCE(
+ it != shape_hints.end(), "Cannot find shape info for output ", o);
+ const auto& shape = it->second.shape;
+ output_shape_hints.emplace(output, shape);
+ o = output;
}
// Build ONNXIFI Op
+ std::vector<std::string> onnxifi_net_inputs(
+ onnxifi_net.external_input().begin(), onnxifi_net.external_input().end());
+ std::vector<std::string> onnxifi_net_outputs(
+ onnxifi_net.external_output().begin(),
+ onnxifi_net.external_output().end());
std::string model_str;
- wrapper_net.SerializeToString(&model_str);
+ onnxifi_net.SerializeToString(&model_str);
auto onnxifi_op = BuildOnnxifiOp(
- model_str, output_shape_hints, initialization_list, net_copy);
+ model_str,
+ output_shape_hints,
+ initialization_list,
+ onnxifi_net_inputs,
+ onnxifi_net_outputs);
NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
// Debugging stuff
if (opts_.debug) {
- WriteProtoToTextFile(wrapper_net, "debug_wrapper_net.pb_txt");
+ WriteProtoToTextFile(onnxifi_net, "debug_onnxifi_net.pb_txt");
WriteProtoToTextFile(net_opt, "debug_optimized_net.pb_txt");
}
return net_opt;
@@ -551,17 +582,21 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
::ONNX_NAMESPACE::ModelProto onnx_model;
FillModelInfo(&onnx_model);
- caffe2::NetDef wrapper_net(net);
+ caffe2::NetDef onnxifi_net(net);
vector<OperatorDef> input_ops;
vector<OperatorDef> output_ops;
- AddAdjustBatchOps(*shape_hints, &wrapper_net, &input_ops, &output_ops);
+ auto renaming_map =
+ AddAdjustBatchOps(*shape_hints, &onnxifi_net, &input_ops, &output_ops);
+ for (const auto& kv : renaming_map) {
+ shape_hints_onnx->emplace(kv.second, shape_hints_onnx->at(kv.first));
+ }
// Convert c2 ops to onnx ops, add const weights if there are any
DeviceOption option;
CPUContext context(option);
context.SwitchToDevice();
std::vector<std::string> extra_weights;
- for (const auto& op : net.op()) {
+ for (const auto& op : onnxifi_net.op()) {
const auto results = exporter->Caffe2OpToOnnxNodes(op, *shape_hints_onnx);
for (const auto& n : results.first) {
onnx_model.mutable_graph()->add_node()->CopyFrom(n);
@@ -610,12 +645,17 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
}
// Convert outputs and compute output shape hints
- std::vector<std::string> io_names;
- for (const auto& output : net.external_output()) {
- io_names.emplace_back(output);
+ std::vector<std::string> onnxifi_net_outputs;
+ for (const auto& o : net.external_output()) {
+ auto output = o;
+ const auto it = renaming_map.find(o);
+ if (it != renaming_map.end()) {
+ output = it->second;
+ }
+ onnxifi_net_outputs.emplace_back(output);
}
auto io_vec = ConvertToValueInfo(
- io_names,
+ onnxifi_net_outputs,
*shape_hints_onnx,
std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
std::unordered_map<std::string, TensorShape> output_shape_hints;
@@ -632,33 +672,43 @@ NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
// Convert inputs and figure out weights
std::unordered_set<std::string> initialization_list;
- std::vector<std::string> total_inputs_vec;
+ std::vector<std::string> onnxifi_net_inputs;
GetWeightsAndInputs(
net,
weights_in_ws,
extra_weights,
&initialization_list,
- &total_inputs_vec);
+ &onnxifi_net_inputs);
+ for (auto& i : onnxifi_net_inputs) {
+ const auto it = renaming_map.find(i);
+ if (it != renaming_map.end()) {
+ i = it->second;
+ }
+ }
io_vec = ConvertToValueInfo(
- total_inputs_vec,
+ onnxifi_net_inputs,
*shape_hints_onnx,
std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
for (const auto& i : io_vec) {
onnx_model.mutable_graph()->add_input()->CopyFrom(i);
}
- // Debugging stuff
- if (opts_.debug) {
- WriteProtoToTextFile(onnx_model, "debug.onnx_txt");
- }
-
// Onnx model is ready. Build ONNXIFI Op
std::string model_str;
onnx_model.SerializeToString(&model_str);
- auto onnxifi_op =
- BuildOnnxifiOp(model_str, output_shape_hints, initialization_list, net);
+ auto onnxifi_op = BuildOnnxifiOp(
+ model_str,
+ output_shape_hints,
+ initialization_list,
+ onnxifi_net_inputs,
+ onnxifi_net_outputs);
NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
+ // Debugging stuff
+ if (opts_.debug) {
+ WriteProtoToTextFile(onnx_model, "debug_onnxifi_net.onnx_txt");
+ WriteProtoToTextFile(net_opt, "debug_optimized_net.pb_txt");
+ }
return net_opt;
}
diff --git a/caffe2/opt/onnxifi_transformer.h b/caffe2/opt/onnxifi_transformer.h
index b178909b22..a7ba90a0f9 100644
--- a/caffe2/opt/onnxifi_transformer.h
+++ b/caffe2/opt/onnxifi_transformer.h
@@ -79,7 +79,8 @@ class CAFFE2_API OnnxifiTransformer final {
const std::string& onnx_model_str,
const std::unordered_map<std::string, TensorShape>& output_size_hints,
const std::unordered_set<std::string>& initialization_list,
- const caffe2::NetDef& net);
+ const std::vector<std::string>& external_inputs,
+ const std::vector<std::string>& external_outputs);
CaffeMap<std::string, TensorShape> SsaRewriteAndMapNames(
Workspace* ws,