diff options
Diffstat (limited to 'inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp')
-rw-r--r-- | inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp | 345 |
1 files changed, 301 insertions, 44 deletions
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 9fe8a60b0..983fc2b35 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -10,24 +10,39 @@ #include <unordered_set> #include <limits> #include <fstream> -#include <caseless.hpp> +#include <unordered_map> +#include "details/caseless.hpp" #include "mkldnn_graph.h" #include "mkldnn_graph_optimizer.h" #include <debug.h> #include <nodes/mkldnn_input_node.h> #include <nodes/mkldnn_reorder_node.h> +#include <nodes/mkldnn_depthwise_node.h> +#include <nodes/mkldnn_conv_node.h> + #include "mkldnn_extension_utils.h" #include "mkldnn_extension_mngr.h" #include "mkldnn/omp_manager.h" -#include <omp.h> +#include "ie_parallel.hpp" #include <graph_tools.hpp> #include <cpp_interfaces/ie_executor_manager.hpp> #include "ie_algorithm.hpp" #include "memory_solver.hpp" #include "mkldnn_infer_request.h" #include "mkldnn_async_infer_request.h" -// #define DEBUG_DUMP_PATH "/home/user/HDD/gna-mkldnn/" +#include <blob_factory.hpp> +#include <ie_util_internal.hpp> + +#include <data_stats.h> +#include "../inference_engine/cnn_network_int8_normalizer.hpp" + +#define XBYAK_NO_OP_NAMES +#define XBYAK_UNDEF_JNL +#include "../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h" + +#include "cnn_network_stats_impl.hpp" +// #define DEBUG_DUMP_PATH "/temp/path/dump/" // #define DEBUG_DUMP_NEW_FOLDER_PER_INFER #ifdef DEBUG_DUMP_PATH #include "../../thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp" @@ -39,11 +54,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace MKLDNNPlugin::cpu; using namespace InferenceEngine; -using namespace InferenceEngine::MKLDNNPlugin; +using namespace InferenceEngine::details; void BindThreads(mkldnn::engine eng) { static bool alreadyBind = false; if (!alreadyBind) { +#if IE_THREAD == IE_THREAD_OMP int env_cores = 0; if (getenv("OMP_NUM_THREADS") != nullptr) { try { @@ -57,9 +73,9 @@ void BindThreads(mkldnn::engine eng) { OpenMpManager::bindOpenMpThreads(env_cores); #else int num_cores = env_cores == 0 ? OpenMpManager::getOpenMpThreadNumber() : env_cores; - omp_set_num_threads(num_cores); + parallel_set_num_threads(num_cores); +#endif #endif - alreadyBind = true; } } @@ -78,8 +94,13 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: No inputs for the topology"; } - for (auto input : inputs) { - MKLDNNNodePtr inputNode; + // The input layer precision has to be equal to the InputData precision + for (const auto& input : inputs) { + auto inputLayer = input.second->getInputData()->getCreatorLayer().lock(); + if (inputLayer) inputLayer->precision = inputLayer->outData[0]->precision; + } + + for (const auto& input : inputs) { auto inputLayer = input.second->getInputData()->getCreatorLayer().lock(); if (!inputLayer) { // For v1 parser @@ -90,7 +111,7 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager inputLayer->outData.push_back(input.second->getInputData()); } - inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(inputLayer, getEngine(), extMgr)); + const MKLDNNNodePtr inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(inputLayer, getEngine(), extMgr)); graphNodes.push_back(inputNode); inputNodes[input.first] = inputNode; @@ -116,7 +137,7 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager } auto allInputs = CNNNetGetAllInputLayers(network); - for (auto input : allInputs) { + for (const auto& input : allInputs) { auto isRealInput = std::find_if(std::begin(inputs), std::end(inputs), [&](InputsDataMap::value_type& inputInfo){ return inputInfo.second->getInputData()->getName() == input->name; }); @@ -157,27 +178,84 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager std::map<std::string, DataPtr> output; network.getOutputsInfo(output); - for (auto it = output.begin(); it != output.end(); it++) { - MKLDNNNodePtr node = FindNodeWithName((*it).second->getCreatorLayer().lock()->name); + for (auto it = output.begin(); it != output.end(); ++it) { + const DataPtr& outputDataPtr = it->second; + + MKLDNNNodePtr node = FindNodeWithName(outputDataPtr->getCreatorLayer().lock()->name); if (!node) - THROW_IE_EXCEPTION << "Cannot find output layer " << (*it).second->getCreatorLayer().lock()->name; + THROW_IE_EXCEPTION << "Cannot find output layer " << outputDataPtr->getCreatorLayer().lock()->name; - std::string name = "out_" + (*it).first; + const std::string name = "out_" + it->first; - CNNLayerPtr layer(new CNNLayer({name, - "Output", - (*it).second->getCreatorLayer().lock()->outData[0]->getPrecision()})); - layer->insData.push_back((*it).second); + CNNLayerPtr layer(new CNNLayer({name, "Output", outputDataPtr->getCreatorLayer().lock()->outData[0]->getPrecision()})); + layer->insData.push_back(outputDataPtr); MKLDNNNodePtr outputLayer(new MKLDNNInputNode(layer, getEngine())); MKLDNNEdgePtr edgePtr(new MKLDNNEdge(node, outputLayer)); graphEdges.push_back(edgePtr); - outputLayer->addEdge(edgePtr, 0, node->getChildEdges().size()); + + const std::vector<MKLDNNEdgeWeakPtr>& childEdges = node->getChildEdges(); + size_t insertBeforeChildEdgeIndex = childEdges.size(); + if (!childEdges.empty()) { + bool outputDataIndexWasFound = false; + size_t outputDataIndex = 0; + for (size_t i = 0; i < node->getCnnLayer()->outData.size(); ++i) { + const DataPtr& otherOutputDataPtr = node->getCnnLayer()->outData[i]; + if (otherOutputDataPtr->name == it->first) { + outputDataIndexWasFound = true; + outputDataIndex = i; + } + } + IE_ASSERT(outputDataIndexWasFound) << "Node " << node->getName() << " doesn't have output data '" << it->first << "'"; + + std::unordered_map<Data*, size_t> nodeOutputDataIndexByData; + const CNNLayerPtr& nodeLayer = node->getCnnLayer(); + for (size_t dataIndex = 0; dataIndex < nodeLayer->outData.size(); ++dataIndex) { + nodeOutputDataIndexByData.emplace(nodeLayer->outData[dataIndex].get(), dataIndex); + } + + auto getOutputDataIndex = [&](const MKLDNNEdgePtr& childEdge) -> size_t { + const InferenceEngine::CNNLayerPtr& childNodeLayer = childEdge->getChild()->getCnnLayer(); + for (const DataWeakPtr& childNodeInsertWeakData : childNodeLayer->insData) { + const DataPtr childNodeInsertData = childNodeInsertWeakData.lock(); + if (!childNodeInsertData) { + continue; + } + + const auto indexIt = nodeOutputDataIndexByData.find(childNodeInsertData.get()); + if (indexIt != nodeOutputDataIndexByData.end()) { + return indexIt->second; + } + } + + IE_ASSERT(false) << "Node has child edge without insert data"; + }; + + for (size_t childEdgeIndex = 0; childEdgeIndex < childEdges.size(); ++childEdgeIndex) { + const MKLDNNEdgePtr childEdge = childEdges[childEdgeIndex].lock(); + if (!childEdge) { + continue; + } + + const size_t edgeOutputDataIndex = getOutputDataIndex(childEdge); + if (outputDataIndex < edgeOutputDataIndex) { + insertBeforeChildEdgeIndex = childEdgeIndex; + break; + } + } + } + + if (insertBeforeChildEdgeIndex < childEdges.size()) { + outputLayer->addEdge(edgePtr, 0, insertBeforeChildEdgeIndex, true); + } else { + outputLayer->addEdge(edgePtr, 0, node->getChildEdges().size()); + } + graphNodes.push_back(outputLayer); outputNodes.push_back(outputLayer); } MKLDNNGraphOptimizer optimizer; - optimizer.Optimize(*this); + optimizer.ApplyCommonGraphOptimizations(*this); SortTopologically(); InitNodes(); @@ -187,6 +265,8 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager } InitEdges(); + optimizer.ApplyImplSpecificGraphOptimizations(*this); + SortTopologically(); Allocate(); @@ -197,6 +277,31 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager graphNode->cleanup(); } + for (auto &graphNode : graphNodes) { +#ifndef NDEBUG + std::cout << "name: " << graphNode->getName() << " [ "; +#endif + if (graphNode->parentEdges.size() > 0) { + auto prnt = graphNode->parentEdges[0].lock(); +#ifndef NDEBUG + std::cout << "in: " << prnt->getOutputDesc().getPrecision().name() << "/l=" + << prnt->getOutputDesc().getLayout() + << "; "; +#endif + } + if (graphNode->childEdges.size() > 0) { + auto chld = graphNode->childEdges[0].lock(); +#ifndef NDEBUG + std::cout << "out: " << chld->getInputDesc().getPrecision().name() << "/l=" + << chld->getInputDesc().getLayout(); +#endif + } +#ifndef NDEBUG + std::cout << " ]" << std::endl; +#endif + } + + mkldnn::stream stream = mkldnn::stream(stream::kind::eager); for (auto &graphNode : graphNodes) { if (!graphNode->isConstant()) @@ -210,7 +315,9 @@ void MKLDNNGraph::CreateGraph(ICNNNetwork &network, const MKLDNNExtensionManager void MKLDNNGraph::ParseNode(const CNNLayerPtr& cnnLayer, MKLDNNNodePtr& parent, const MKLDNNExtensionManager::Ptr& extMgr, size_t outIdx, std::vector<ParsedLayer>& queuelayers) { - if (cnnLayer->precision != Precision::FP32) { + if (cnnLayer->precision != Precision::FP32 && + cnnLayer->precision != Precision::I8 && + cnnLayer->precision != Precision::U8) { THROW_IE_EXCEPTION << "The plugin does not support " << cnnLayer->precision; } @@ -383,22 +490,20 @@ void MKLDNNGraph::InitEdges() { MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, graphEdges[i]->getChild())); afterNode->setDims(graphEdges[i]->getDims()); - int oIndex = graphEdges[i]->getOutputNum(); - int iIndex = graphEdges[i]->getInputNum(); - if (iIndex < 0 || oIndex < 0) + auto oIndexes = graphEdges[i]->getAllOutputNums(); + auto iIndexes = graphEdges[i]->getAllInputNums(); + if (iIndexes[0] < 0 || oIndexes[0] < 0) THROW_IE_EXCEPTION << "Cannot create reorder for nodes: " << graphEdges[i]->getParent()->getName() << " and " << graphEdges[i]->getChild()->getName() << "."; // Add edge for beforeNode - graphEdges[i]->getParent()->childEdges[iIndex].reset(); - graphEdges[i]->getParent()->childEdges[iIndex] = beforeNode; beforeNode->getChild()->parentEdges.push_back(beforeNode); + for (auto iIndex : iIndexes) graphEdges[i]->getParent()->childEdges[iIndex] = beforeNode; // Add edge for afterNode afterNode->getParent()->childEdges.push_back(afterNode); - graphEdges[i]->getChild()->parentEdges[oIndex].reset(); - graphEdges[i]->getChild()->parentEdges[oIndex] = afterNode; + for (auto oIndex : oIndexes) graphEdges[i]->getChild()->parentEdges[oIndex] = afterNode; newReorder->getSupportedDescriptors(); newReorder->initSupportedPrimitiveDescriptors(); @@ -532,7 +637,7 @@ void MKLDNNGraph::AllocateWithReuse() { size_t total_size = memSolver.solve() * alignment; memWorkspace.reset(new MKLDNNMemory(eng)); - memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::FP32, {1, total_size}, Layout::NC))); + memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::FP32, {total_size}, Layout::C))); float* workspace_ptr = static_cast<float*>(memWorkspace->GetData()); for (int i = 0; i < edge_clasters.size(); i++) { @@ -582,9 +687,15 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: const void *ext_data_ptr = in->cbuffer(); void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); - if (ext_data_ptr != inter_data_ptr) - input->second->getChildEdgeAt(0)->getMemory().SetData(MKLDNNExtensionUtils::IEPrecisionToDataType(in->getTensorDesc().getPrecision()), - MKLDNNMemory::Convert(in->getTensorDesc().getLayout()), ext_data_ptr, in->byteSize(), false); + if (ext_data_ptr != inter_data_ptr) { + auto l = in->getTensorDesc().getLayout(); + if (l == CHW && input->second->getChildEdgeAt(0)->getDims().ndims() == 4) + l = NCHW; + + input->second->getChildEdgeAt(0)->getMemory().SetData( + MKLDNNExtensionUtils::IEPrecisionToDataType(in->getTensorDesc().getPrecision()), + MKLDNNMemory::Convert(l), ext_data_ptr, in->byteSize(), false); + } // todo: make sure 'name' exists in this map... if (_meanImages.find(name) != _meanImages.end()) { @@ -832,9 +943,28 @@ void MKLDNNGraph::Infer(int batch) { for (size_t d = 0; d < childEdge->getDims().ndims(); d++) layer_data_dump << childEdge->getDims()[d] << " "; layer_data_dump << "(" << dst_d.nelems() << ")" << std::endl; - for (size_t i = 0; i < dst_d.nelems(); i++) { - layer_data_dump << std::fixed << std::setprecision(3) << data[dst_d.off_l(i)] << std::endl; + if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::FP32) { + float *data = childEdge->getBlob()->buffer(); + for (size_t bs = 0; bs < dst_d.nelems(); bs++) { + layer_data_dump << std::fixed << std::setprecision(3) << data[dst_d.off_l(bs)] << std::endl; + } + } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::I8) { + int8_t *data = childEdge->getBlob()->buffer(); + for (size_t bs = 0; bs < dst_d.nelems(); bs++) { + layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl; + } + } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::U8) { + uint8_t *data = childEdge->getBlob()->buffer(); + for (size_t bs = 0; bs < dst_d.nelems(); bs++) { + layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl; + } + } else if (childEdge->getBlob()->getTensorDesc().getPrecision() == Precision::I32) { + int32_t *data = childEdge->getBlob()->buffer(); + for (size_t bs = 0; bs < dst_d.nelems(); bs++) { + layer_data_dump << static_cast<int>(data[dst_d.off_l(bs)]) << std::endl; + } } + layer_data_dump.close(); } else { std::cout << "Cannot create file " << fname << std::endl; @@ -851,8 +981,7 @@ void MKLDNNGraph::Infer(int batch) { std::string fname = tname + ".txt"; layer_data_dump.open(fname); if (layer_data_dump.is_open()) { - float *data = static_cast<float *>(graphNodes[i]->getParentEdges()[p] - .lock()->getMemory().GetData()); + size_t dataSize = graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetSize(); mkldnn::impl::memory_desc_wrapper src_d(graphNodes[i]->getParentEdges()[p] .lock()->getMemory().GetDescriptor().data); #ifdef DEBUG_BMP_OUTPUT @@ -862,9 +991,31 @@ void MKLDNNGraph::Infer(int batch) { for (size_t d = 0; d < parentEdge->getDims().ndims(); d++) layer_data_dump << parentEdge->getDims()[d] << " "; layer_data_dump << "(" << src_d.nelems() << ")"<< std::endl; - for (size_t i = 0; i < src_d.nelems(); i++) { - layer_data_dump << std::fixed << std::setprecision(3) << data[src_d.off_l(i)] << std::endl; + auto precision = graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision(); + if (precision == Precision::FP32) { + float *data = static_cast<float *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData()); + for (size_t bs = 0; bs < dataSize; bs++) { + layer_data_dump << std::fixed << std::setprecision(3) << data[src_d.off_l(bs)] << std::endl; + } + } else if (precision == Precision::I8) { + int8_t *data = static_cast<int8_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData()); + for (size_t bs = 0; bs < dataSize; bs++) { + layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl; + } + } else if (graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision() == Precision::U8) { + uint8_t *data = static_cast<uint8_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData()); + for (size_t bs = 0; bs < dataSize; bs++) { + layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl; + } + } else if (graphNodes[i]->getParentEdges()[p].lock()->outputDesc.getPrecision() == Precision::I32) { + int32_t *data = static_cast<int32_t *>(graphNodes[i]->getParentEdges()[p].lock()->getMemory().GetData()); + for (size_t bs = 0; bs < dataSize; bs++) { + layer_data_dump << static_cast<int>(data[src_d.off_l(bs)]) << std::endl; + } + } else { + layer_data_dump << "Unsupported precision: " << precision.name() << std::endl; } + layer_data_dump.close(); } else { std::cout << "Cannot create file " << fname << std::endl; @@ -874,16 +1025,35 @@ void MKLDNNGraph::Infer(int batch) { GenericLayer* genericLayer = dynamic_cast<GenericLayer*>(graphNodes[i]->getCnnLayer().get()); if (genericLayer != nullptr) { for (auto blob : genericLayer->blobs) { - layer_data_dump.open(folderName + nodeName + "_" + blob.first + ".txt"); + layer_data_dump.open(folderName + nodeName + "_blob-" + blob.first + ".txt"); if (layer_data_dump.is_open()) { layer_data_dump << "shape: "; for (size_t d = 0; d < blob.second->dims().size(); d++) layer_data_dump << blob.second->dims()[d] << " "; layer_data_dump << "(" << blob.second->size() << ")"<< std::endl; + if (blob.second->getTensorDesc().getPrecision() == Precision::FP32) { float *data = blob.second->buffer(); for (size_t bs = 0; bs < blob.second->size(); bs++) { layer_data_dump << std::fixed << std::setprecision(3) << data[bs] << std::endl; } + } else if (blob.second->getTensorDesc().getPrecision() == Precision::I8) { + int8_t *data = blob.second->buffer(); + for (size_t bs = 0; bs < blob.second->size(); bs++) { + layer_data_dump << static_cast<int>(data[bs]) << std::endl; + } + } else if (blob.second->getTensorDesc().getPrecision() == Precision::U8) { + uint8_t *data = blob.second->buffer(); + for (size_t bs = 0; bs < blob.second->size(); bs++) { + layer_data_dump << static_cast<int>(data[bs]) << std::endl; + } + } else if (blob.second->getTensorDesc().getPrecision() == Precision::I32) { + int32_t *data = blob.second->buffer(); + for (size_t bs = 0; bs < blob.second->size(); bs++) { + layer_data_dump << static_cast<int>(data[bs]) << std::endl; + } + } else { + layer_data_dump << "Unsupported precision: " << blob.second->getTensorDesc().getPrecision().name() << std::endl; + } layer_data_dump.close(); } else { std::cout << "Cannot create file " << folderName << nodeName @@ -901,14 +1071,13 @@ MKLDNNNodePtr MKLDNNGraph::FindNodeWithName(const std::string& name) const { return std::shared_ptr<MKLDNNNode>(); } - auto childs = graphNodes; - - auto node = std::find_if(childs.begin(), childs.end(), + const auto children = graphNodes; + const auto node = std::find_if(children.begin(), children.end(), [&name](MKLDNNNodePtr const& item) { return item->getName() == name; }); - return (node == childs.end() ? std::shared_ptr<MKLDNNNode>() : *node); + return (node == children.end() ? std::shared_ptr<MKLDNNNode>() : *node); } void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes) { @@ -1014,6 +1183,80 @@ void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) { } } +void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) { + auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) { + auto& edges = graph.GetEdges(); + for (auto it = edges.begin(); it != edges.end(); it++) { + if ((*it) == edge) { + edges.erase(it); + return; + } + } + }; + for (size_t i = 0; i < node->parentEdges.size(); i++) { + if (!node->parentEdges[i].lock()) + continue; + auto parent = node->parentEdges[i].lock()->getParent(); + if (!parent) + continue; + + for (size_t j = 0; j < node->childEdges.size(); j++) { + if (!node->childEdges[j].lock()) + continue; + auto child = node->childEdges[j].lock()->getChild(); + if (!child) + continue; + + MKLDNNEdgePtr remEdge = node->parentEdges[i].lock(); + int inNum = 0; + if (remEdge) { + inNum = remEdge->getInputNum(); + node->removeEdge(remEdge); + removeEdge(*this, remEdge); + } + inNum += j; + remEdge = node->childEdges[j].lock(); + int outNum = 0; + if (remEdge) { + outNum = remEdge->getOutputNum(); + node->removeEdge(remEdge); + removeEdge(*this, remEdge); + } + MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child)); + this->GetEdges().push_back(newEdge); + parent->addEdge(newEdge, outNum, inNum); + } + } +} + +void MKLDNNGraph::RemoveDroppedNodes() { + auto& nodes = this->GetNodes(); + + auto it = nodes.begin(); + + while (it != nodes.end()) { + if ((*it)->isDropped()) { + it = nodes.erase(it); + } else { + it++; + } + } +} + +void MKLDNNGraph::RemoveDroppedEdges() { + auto& edges = this->GetEdges(); + + auto it = edges.begin(); + + while (it != edges.end()) { + if ((*it)->isDropped()) { + it = edges.erase(it); + } else { + it++; + } + } +} + bool MKLDNNExecNetwork::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const { InputsDataMap inputs; network.getInputsInfo(inputs); @@ -1081,7 +1324,21 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(InferenceEngine::ICNNNetwork &network, // initialization in taskExecutor thread auto task = std::make_shared<InferenceEngine::Task>([&]() { - graph->CreateGraph(network, extensionManager); + // we are cloning network if we have statistics and we can transform network + // in other case we pass original network. Especially because LSTM networks + // are not cloned properly + ICNNNetworkStats* pstats = nullptr; + StatusCode s = network.getStats(&pstats, nullptr); + Xbyak::util::Cpu cpu; + // Enable int8 only for avx512 + if (s == StatusCode::OK && pstats && !pstats->isEmpty() && cpu.has(Xbyak::util::Cpu::tAVX512F)) { + details::CNNNetworkImplPtr clonnedNetwork = cloneNet(network); + CNNNetworkInt8Normalizer cnnorm; + cnnorm.NormalizeNetwork(*clonnedNetwork, *pstats); + graph->CreateGraph(*clonnedNetwork, extensionManager); + } else { + graph->CreateGraph(network, extensionManager); + } }); _taskExecutor->startTask(task); |