summaryrefslogtreecommitdiff
path: root/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp')
-rw-r--r--inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp345
1 files changed, 301 insertions, 44 deletions
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 9fe8a60b0..983fc2b35 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -10,24 +10,39 @@
#include <unordered_set>
#include <limits>
#include <fstream>
-#include <caseless.hpp>
+#include <unordered_map>
+#include "details/caseless.hpp"
#include "mkldnn_graph.h"
#include "mkldnn_graph_optimizer.h"
#include <debug.h>
#include <nodes/mkldnn_input_node.h>
#include <nodes/mkldnn_reorder_node.h>
+#include <nodes/mkldnn_depthwise_node.h>
+#include <nodes/mkldnn_conv_node.h>
+
#include "mkldnn_extension_utils.h"
#include "mkldnn_extension_mngr.h"
#include "mkldnn/omp_manager.h"
-#include <omp.h>
+#include "ie_parallel.hpp"
#include <graph_tools.hpp>
#include <cpp_interfaces/ie_executor_manager.hpp>
#include "ie_algorithm.hpp"
#include "memory_solver.hpp"
#include "mkldnn_infer_request.h"
#include "mkldnn_async_infer_request.h"
-// #define DEBUG_DUMP_PATH "/home/user/HDD/gna-mkldnn/"
+#include <blob_factory.hpp>
+#include <ie_util_internal.hpp>
+
+#include <data_stats.h>
+#include "../inference_engine/cnn_network_int8_normalizer.hpp"
+
+#define XBYAK_NO_OP_NAMES
+#define XBYAK_UNDEF_JNL
+#include "../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h"
+
+#include "cnn_network_stats_impl.hpp"
+// #define DEBUG_DUMP_PATH "/temp/path/dump/"
// #define DEBUG_DUMP_NEW_FOLDER_PER_INFER
#ifdef DEBUG_DUMP_PATH
#include "../../thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp"
@@ -39,11 +54,12 @@ using namespace mkldnn;
using namespace MKLDNNPlugin;
using namespace MKLDNNPlugin::cpu;
using namespace InferenceEngine;
-using namespace InferenceEngine::MKLDNNPlugin;
+using namespace InferenceEngine::details;
void BindThreads(mkldnn::engine eng) {
static bool alreadyBind = false;
if (!alreadyBind) {
+#if IE_THREAD == IE_THREAD_OMP
int env_cores = 0;
if (getenv("OMP_NUM_THREADS") != nullptr) {
try {
@@ -57,9 +73,9 @@ void BindThreads(mkldnn::engine eng) {
OpenMpManager::bindOpenMpThreads(env_cores);
#else
int num_cores = env_cores == 0 ? OpenMpManager::getOpenMpThreadNumber() : env_cores;
- omp_set_num_threads(num_cores);
+ parallel_set_num_threads(num_cores);
+#endif
#endif
-
alreadyBind = true;
}
}
@@ -78,8 +94,13 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: No inputs for the topology";
}
- for (auto input : inputs) {
- MKLDNNNodePtr inputNode;
+ // The input layer precision has to be equal to the InputData precision
+ for (const auto& input : inputs) {
+ auto inputLayer = input.second->getInputData()->getCreatorLayer().lock();
+ if (inputLayer) inputLayer->precision = inputLayer->outData[0]->precision;
+ }
+
+ for (const auto& input : inputs) {
auto inputLayer = input.second->getInputData()->getCreatorLayer().lock();
if (!inputLayer) {
// For v1 parser
@@ -90,7 +111,7 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
inputLayer->outData.push_back(input.second->getInputData());
}
- inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(inputLayer, getEngine(), extMgr));
+ const MKLDNNNodePtr inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(inputLayer, getEngine(), extMgr));
graphNodes.push_back(inputNode);
inputNodes[input.first] = inputNode;
@@ -116,7 +137,7 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
}
auto allInputs = CNNNetGetAllInputLayers(network);
- for (auto input : allInputs) {
+ for (const auto& input : allInputs) {
auto isRealInput = std::find_if(std::begin(inputs), std::end(inputs), [&](InputsDataMap::value_type& inputInfo){
return inputInfo.second->getInputData()->getName() == input->name;
});
@@ -157,27 +178,84 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
std::map<std::string, DataPtr> output;
network.getOutputsInfo(output);
- for (auto it = output.begin(); it != output.end(); it++) {
- MKLDNNNodePtr node = FindNodeWithName((*it).second->getCreatorLayer().lock()->name);
+ for (auto it = output.begin(); it != output.end(); ++it) {
+ const DataPtr& outputDataPtr = it->second;
+
+ MKLDNNNodePtr node = FindNodeWithName(outputDataPtr->getCreatorLayer().lock()->name);
if (!node)
- THROW_IE_EXCEPTION << "Cannot find output layer " << (*it).second->getCreatorLayer().lock()->name;
+ THROW_IE_EXCEPTION << "Cannot find output layer " << outputDataPtr->getCreatorLayer().lock()->name;
- std::string name = "out_" + (*it).first;
+ const std::string name = "out_" + it->first;
- CNNLayerPtr layer(new CNNLayer({name,
- "Output",
- (*it).second->getCreatorLayer().lock()->outData[0]->getPrecision()}));
- layer->insData.push_back((*it).second);
+ CNNLayerPtr layer(new CNNLayer({name, "Output", outputDataPtr->getCreatorLayer().lock()->outData[0]->getPrecision()}));
+ layer->insData.push_back(outputDataPtr);
MKLDNNNodePtr outputLayer(new MKLDNNInputNode(layer, getEngine()));
MKLDNNEdgePtr edgePtr(new MKLDNNEdge(node, outputLayer));
graphEdges.push_back(edgePtr);
- outputLayer->addEdge(edgePtr, 0, node->getChildEdges().size());
+
+ const std::vector<MKLDNNEdgeWeakPtr>& childEdges = node->getChildEdges();
+ size_t insertBeforeChildEdgeIndex = childEdges.size();
+ if (!childEdges.empty()) {
+ bool outputDataIndexWasFound = false;
+ size_t outputDataIndex = 0;
+ for (size_t i = 0; i < node->getCnnLayer()->outData.size(); ++i) {
+ const DataPtr& otherOutputDataPtr = node->getCnnLayer()->outData[i];
+ if (otherOutputDataPtr->name == it->first) {
+ outputDataIndexWasFound = true;
+ outputDataIndex = i;
+ }
+ }
+ IE_ASSERT(outputDataIndexWasFound) << "Node " << node->getName() << " doesn't have output data '" << it->first << "'";
+
+ std::unordered_map<Data*, size_t> nodeOutputDataIndexByData;
+ const CNNLayerPtr& nodeLayer = node->getCnnLayer();
+ for (size_t dataIndex = 0; dataIndex < nodeLayer->outData.size(); ++dataIndex) {
+ nodeOutputDataIndexByData.emplace(nodeLayer->outData[dataIndex].get(), dataIndex);
+ }
+
+ auto getOutputDataIndex = [&](const MKLDNNEdgePtr& childEdge) -> size_t {
+ const InferenceEngine::CNNLayerPtr& childNodeLayer = childEdge->getChild()->getCnnLayer();
+ for (const DataWeakPtr& childNodeInsertWeakData : childNodeLayer->insData) {
+ const DataPtr childNodeInsertData = childNodeInsertWeakData.lock();
+ if (!childNodeInsertData) {
+ continue;
+ }
+
+ const auto indexIt = nodeOutputDataIndexByData.find(childNodeInsertData.get());
+ if (indexIt != nodeOutputDataIndexByData.end()) {
+ return indexIt->second;
+ }
+ }
+
+ IE_ASSERT(false) << "Node has child edge without insert data";
+ };
+
+ for (size_t childEdgeIndex = 0; childEdgeIndex < childEdges.size(); ++childEdgeIndex) {
+ const MKLDNNEdgePtr childEdge = childEdges[childEdgeIndex].lock();
+ if (!childEdge) {
+ continue;
+ }
+
+ const size_t edgeOutputDataIndex = getOutputDataIndex(childEdge);
+ if (outputDataIndex < edgeOutputDataIndex) {
+ insertBeforeChildEdgeIndex = childEdgeIndex;
+ break;
+ }
+ }
+ }
+
+ if (insertBeforeChildEdgeIndex < childEdges.size()) {
+ outputLayer->addEdge(edgePtr, 0, insertBeforeChildEdgeIndex, true);
+ } else {
+ outputLayer->addEdge(edgePtr, 0, node->getChildEdges().size());
+ }
+
graphNodes.push_back(outputLayer);
outputNodes.push_back(outputLayer);
}
MKLDNNGraphOptimizer optimizer;
- optimizer.Optimize(*this);
+ optimizer.ApplyCommonGraphOptimizations(*this);
SortTopologically();
InitNodes();
@@ -187,6 +265,8 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
}
InitEdges();
+ optimizer.ApplyImplSpecificGraphOptimizations(*this);
+
SortTopologically();
Allocate();
@@ -197,6 +277,31 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
graphNode->cleanup();
}
+ for (auto &graphNode : graphNodes) {
+#ifndef NDEBUG
+ std::cout << "name: " << graphNode->getName() << " [ ";
+#endif
+ if (graphNode->parentEdges.size() > 0) {
+ auto prnt = graphNode->parentEdges[0].lock();
+#ifndef NDEBUG
+ std::cout << "in: " << prnt->getOutputDesc().getPrecision().name() << "/l="
+ << prnt->getOutputDesc().getLayout()
+ << "; ";
+#endif
+ }
+ if (graphNode->childEdges.size() > 0) {
+ auto chld = graphNode->childEdges[0].lock();
+#ifndef NDEBUG
+ std::cout << "out: " << chld->getInputDesc().getPrecision().name() << "/l="
+ << chld->getInputDesc().getLayout();
+#endif
+ }
+#ifndef NDEBUG
+ std::cout << " ]" << std::endl;
+#endif
+ }
+
+
mkldnn::stream stream = mkldnn::stream(stream::kind::eager);
for (auto &graphNode : graphNodes) {
if (!graphNode->isConstant())
@@ -210,7 +315,9 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager
void MKLDNNGraph::ParseNode(const CNNLayerPtr& cnnLayer, MKLDNNNodePtr& parent,
const MKLDNNExtensionManager::Ptr& extMgr, size_t outIdx,
std::vector<ParsedLayer>& queuelayers) {
- if (cnnLayer->precision != Precision::FP32) {
+ if (cnnLayer->precision != Precision::FP32 &&
+ cnnLayer->precision != Precision::I8 &&
+ cnnLayer->precision != Precision::U8) {
THROW_IE_EXCEPTION << "The plugin does not support " << cnnLayer->precision;
}
@@ -383,22 +490,20 @@ void MKLDNNGraph::InitEdges() {
MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, graphEdges[i]->getChild()));
afterNode->setDims(graphEdges[i]->getDims());
- int oIndex = graphEdges[i]->getOutputNum();
- int iIndex = graphEdges[i]->getInputNum();
- if (iIndex < 0 || oIndex < 0)
+ auto oIndexes = graphEdges[i]->getAllOutputNums();
+ auto iIndexes = graphEdges[i]->getAllInputNums();
+ if (iIndexes[0] < 0 || oIndexes[0] < 0)
THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
<< graphEdges[i]->getParent()->getName() << " and "
<< graphEdges[i]->getChild()->getName() << ".";
// Add edge for beforeNode
- graphEdges[i]->getParent()->childEdges[iIndex].reset();
- graphEdges[i]->getParent()->childEdges[iIndex] = beforeNode;
beforeNode->getChild()->parentEdges.push_back(beforeNode);
+ for (auto iIndex : iIndexes) graphEdges[i]->getParent()->childEdges[iIndex] = beforeNode;
// Add edge for afterNode
afterNode->getParent()->childEdges.push_back(afterNode);
- graphEdges[i]->getChild()->parentEdges[oIndex].reset();
- graphEdges[i]->getChild()->parentEdges[oIndex] = afterNode;
+ for (auto oIndex : oIndexes) graphEdges[i]->getChild()->parentEdges[oIndex] = afterNode;
newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
@@ -532,7 +637,7 @@ void MKLDNNGraph::AllocateWithReuse() {
size_t total_size = memSolver.solve() * alignment;
memWorkspace.reset(new MKLDNNMemory(eng));
- memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::FP32, {1, total_size}, Layout::NC)));
+ memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::FP32, {total_size}, Layout::C)));
float* workspace_ptr = static_cast<float*>(memWorkspace->GetData());
for (int i = 0; i < edge_clasters.size(); i++) {
@@ -582,9 +687,15 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
const void *ext_data_ptr = in->cbuffer();
void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData();
- if (ext_data_ptr != inter_data_ptr)
- input->second->getChildEdgeAt(0)->getMemory().SetData(MKLDNNExtensionUtils::IEPrecisionToDataType(in->getTensorDesc().getPrecision()),
- MKLDNNMemory::Convert(in->getTensorDesc().getLayout()), ext_data_ptr, in->byteSize(), false);
+ if (ext_data_ptr != inter_data_ptr) {
+ auto l = in->getTensorDesc().getLayout();
+ if (l == CHW && input->second->getChildEdgeAt(0)->getDims().ndims() == 4)
+ l = NCHW;
+
+ input->second->getChildEdgeAt(0)->getMemory().SetData(
+ MKLDNNExtensionUtils::IEPrecisionToDataType(in->getTensorDesc().getPrecision()),
+ MKLDNNMemory::Convert(l), ext_data_ptr, in->byteSize(), false);
+ }
// todo: make sure 'name' exists in this map...
if (_meanImages.find(name) != _meanImages.end()) {
@@ -832,9 +943,28 @@ void MKLDNNGraph::Infer(int batch) {
for (size_t d = 0; d < childEdge->getDims().ndims(); d++)
layer_data_dump << childEdge->getDims()[d] << " ";
layer_data_dump << "(" << dst_d.nelems() << ")" << std::endl;
- for (size_t i = 0; i < dst_d.nelems(); i++) {
- layer_data_dump << std::fixed << std::setprecision(3) << data[dst_d.off_l(i)] << std::endl;
+ if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::FP32) {
+ float *data = childEdge->getBlob()->buffer();
+ for (size_t bs = 0; bs < dst_d.nelems(); bs++) {
+ layer_data_dump << std::fixed << std::setprecision(3) << data[dst_d.off_l(bs)] << std::endl;
+ }
+ } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::I8) {
+ int8_t *data = childEdge->getBlob()->buffer();
+ for (size_t bs = 0; bs < dst_d.nelems(); bs++) {
+ layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl;
+ }
+ } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::U8) {
+ uint8_t *data = childEdge->getBlob()->buffer();
+ for (size_t bs = 0; bs < dst_d.nelems(); bs++) {
+ layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl;
+ }
+ } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::I32) {
+ int32_t *data = childEdge->getBlob()->buffer();
+ for (size_t bs = 0; bs < dst_d.nelems(); bs++) {
+ layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl;
+ }
}
+
layer_data_dump.close();
} else {
std::cout << "Cannot create file " << fname << std::endl;
@@ -851,8 +981,7 @@ void MKLDNNGraph::Infer(int batch) {
std::string fname = tname + ".txt";
layer_data_dump.open(fname);
if (layer_data_dump.is_open()) {
- float *data = static_cast<float *>(graphNodes[i]->getParentEdges()[p]
- .lock()->getMemory().GetData());
+ size_t dataSize = graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetSize();
mkldnn::impl::memory_desc_wrapper src_d(graphNodes[i]->getParentEdges()[p]
.lock()->getMemory().GetDescriptor().data);
#ifdef DEBUG_BMP_OUTPUT
@@ -862,9 +991,31 @@ void MKLDNNGraph::Infer(int batch) {
for (size_t d = 0; d < parentEdge->getDims().ndims(); d++)
layer_data_dump << parentEdge->getDims()[d] << " ";
layer_data_dump << "(" << src_d.nelems() << ")"<< std::endl;
- for (size_t i = 0; i < src_d.nelems(); i++) {
- layer_data_dump << std::fixed << std::setprecision(3) << data[src_d.off_l(i)] << std::endl;
+ auto precision = graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision();
+ if (precision == Precision::FP32) {
+ float *data = static_cast<float *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData());
+ for (size_t bs = 0; bs < dataSize; bs++) {
+ layer_data_dump << std::fixed << std::setprecision(3) << data[src_d.off_l(bs)] << std::endl;
+ }
+ } else if (precision == Precision::I8) {
+ int8_t *data = static_cast<int8_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData());
+ for (size_t bs = 0; bs < dataSize; bs++) {
+ layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl;
+ }
+ } else if (graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision() == Precision::U8) {
+ uint8_t *data = static_cast<uint8_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData());
+ for (size_t bs = 0; bs < dataSize; bs++) {
+ layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl;
+ }
+ } else if (graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision() == Precision::I32) {
+ int32_t *data = static_cast<int32_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData());
+ for (size_t bs = 0; bs < dataSize; bs++) {
+ layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl;
+ }
+ } else {
+ layer_data_dump << "Unsupported precision: " << precision.name() << std::endl;
}
+
layer_data_dump.close();
} else {
std::cout << "Cannot create file " << fname << std::endl;
@@ -874,16 +1025,35 @@ void MKLDNNGraph::Infer(int batch) {
GenericLayer* genericLayer = dynamic_cast<GenericLayer*>(graphNodes[i]->getCnnLayer().get());
if (genericLayer != nullptr) {
for (auto blob : genericLayer->blobs) {
- layer_data_dump.open(folderName + nodeName + "_" + blob.first + ".txt");
+ layer_data_dump.open(folderName + nodeName + "_blob-" + blob.first + ".txt");
if (layer_data_dump.is_open()) {
layer_data_dump << "shape: ";
for (size_t d = 0; d < blob.second->dims().size(); d++)
layer_data_dump << blob.second->dims()[d] << " ";
layer_data_dump << "(" << blob.second->size() << ")"<< std::endl;
+ if (blob.second->getTensorDesc().getPrecision() == Precision::FP32) {
float *data = blob.second->buffer();
for (size_t bs = 0; bs < blob.second->size(); bs++) {
layer_data_dump << std::fixed << std::setprecision(3) << data[bs] << std::endl;
}
+ } else if (blob.second->getTensorDesc().getPrecision() == Precision::I8) {
+ int8_t *data = blob.second->buffer();
+ for (size_t bs = 0; bs < blob.second->size(); bs++) {
+ layer_data_dump << static_cast<int>(data[bs]) << std::endl;
+ }
+ } else if (blob.second->getTensorDesc().getPrecision() == Precision::U8) {
+ uint8_t *data = blob.second->buffer();
+ for (size_t bs = 0; bs < blob.second->size(); bs++) {
+ layer_data_dump << static_cast<int>(data[bs]) << std::endl;
+ }
+ } else if (blob.second->getTensorDesc().getPrecision() == Precision::I32) {
+ int32_t *data = blob.second->buffer();
+ for (size_t bs = 0; bs < blob.second->size(); bs++) {
+ layer_data_dump << static_cast<int>(data[bs]) << std::endl;
+ }
+ } else {
+ layer_data_dump << "Unsupported precision: " << blob.second->getTensorDesc().getPrecision().name() << std::endl;
+ }
layer_data_dump.close();
} else {
std::cout << "Cannot create file " << folderName << nodeName
@@ -901,14 +1071,13 @@ MKLDNNNodePtr MKLDNNGraph::FindNodeWithName(const std::string& name) const {
return std::shared_ptr<MKLDNNNode>();
}
- auto childs = graphNodes;
-
- auto node = std::find_if(childs.begin(), childs.end(),
+ const auto children = graphNodes;
+ const auto node = std::find_if(children.begin(), children.end(),
[&name](MKLDNNNodePtr const& item) {
return item->getName() == name;
});
- return (node == childs.end() ? std::shared_ptr<MKLDNNNode>() : *node);
+ return (node == children.end() ? std::shared_ptr<MKLDNNNode>() : *node);
}
void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes) {
@@ -1014,6 +1183,80 @@ void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) {
}
}
+void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
+ auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
+ auto& edges = graph.GetEdges();
+ for (auto it = edges.begin(); it != edges.end(); it++) {
+ if ((*it) == edge) {
+ edges.erase(it);
+ return;
+ }
+ }
+ };
+ for (size_t i = 0; i < node->parentEdges.size(); i++) {
+ if (!node->parentEdges[i].lock())
+ continue;
+ auto parent = node->parentEdges[i].lock()->getParent();
+ if (!parent)
+ continue;
+
+ for (size_t j = 0; j < node->childEdges.size(); j++) {
+ if (!node->childEdges[j].lock())
+ continue;
+ auto child = node->childEdges[j].lock()->getChild();
+ if (!child)
+ continue;
+
+ MKLDNNEdgePtr remEdge = node->parentEdges[i].lock();
+ int inNum = 0;
+ if (remEdge) {
+ inNum = remEdge->getInputNum();
+ node->removeEdge(remEdge);
+ removeEdge(*this, remEdge);
+ }
+ inNum += j;
+ remEdge = node->childEdges[j].lock();
+ int outNum = 0;
+ if (remEdge) {
+ outNum = remEdge->getOutputNum();
+ node->removeEdge(remEdge);
+ removeEdge(*this, remEdge);
+ }
+ MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child));
+ this->GetEdges().push_back(newEdge);
+ parent->addEdge(newEdge, outNum, inNum);
+ }
+ }
+}
+
+void MKLDNNGraph::RemoveDroppedNodes() {
+ auto& nodes = this->GetNodes();
+
+ auto it = nodes.begin();
+
+ while (it != nodes.end()) {
+ if ((*it)->isDropped()) {
+ it = nodes.erase(it);
+ } else {
+ it++;
+ }
+ }
+}
+
+void MKLDNNGraph::RemoveDroppedEdges() {
+ auto& edges = this->GetEdges();
+
+ auto it = edges.begin();
+
+ while (it != edges.end()) {
+ if ((*it)->isDropped()) {
+ it = edges.erase(it);
+ } else {
+ it++;
+ }
+ }
+}
+
bool MKLDNNExecNetwork::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const {
InputsDataMap inputs;
network.getInputsInfo(inputs);
@@ -1081,7 +1324,21 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(InferenceEngine::ICNNNetwork &network,
// initialization in taskExecutor thread
auto task = std::make_shared<InferenceEngine::Task>([&]() {
- graph->CreateGraph(network, extensionManager);
+ // we are cloning network if we have statistics and we can transform network
+ // in other case we pass original network. Especially because LSTM networks
+ // are not cloned properly
+ ICNNNetworkStats* pstats = nullptr;
+ StatusCode s = network.getStats(&pstats, nullptr);
+ Xbyak::util::Cpu cpu;
+ // Enable int8 only for avx512
+ if (s == StatusCode::OK && pstats && !pstats->isEmpty() && cpu.has(Xbyak::util::Cpu::tAVX512F)) {
+ details::CNNNetworkImplPtr clonnedNetwork = cloneNet(network);
+ CNNNetworkInt8Normalizer cnnorm;
+ cnnorm.NormalizeNetwork(*clonnedNetwork, *pstats);
+ graph->CreateGraph(*clonnedNetwork, extensionManager);
+ } else {
+ graph->CreateGraph(network, extensionManager);
+ }
});
_taskExecutor->startTask(task);