diff options
author | Alexey Suhov <asuhov@users.noreply.github.com> | 2018-11-23 16:19:43 +0300 |
---|---|---|
committer | openvino-pushbot <44090433+openvino-pushbot@users.noreply.github.com> | 2018-11-23 16:19:43 +0300 |
commit | 55a41d7570f78aaea0d6764d157dd7434730d56f (patch) | |
tree | ba022c71609b93d51119bcb25e5ccb8c7147dbd3 /inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp | |
parent | 54eab180361ec09fbd82e2bb62adfeb521275774 (diff) | |
download | dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.gz dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.bz2 dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.zip |
Publishing R4 (#41)
* Publishing R4
Diffstat (limited to 'inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp')
-rw-r--r-- | inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp | 96 |
1 files changed, 34 insertions, 62 deletions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 6335ce168..1295e05a5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -11,6 +11,7 @@ #include <vector> #include <mkldnn_types.h> #include <mkldnn_extension_utils.h> +#include "ie_parallel.hpp" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -36,9 +37,9 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); if (getParentEdges().size() != 1) - THROW_IE_EXCEPTION << "Incorrect number of input edges."; + THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) - THROW_IE_EXCEPTION << "Incorrect number of output edges."; + THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName(); auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get()); if (deconvLayer == nullptr) @@ -66,24 +67,27 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { deconvLayer->_group, deconvLayer->input()->getTensorDesc().getDims()[1] / deconvLayer->_group, deconvLayer->_out_depth / deconvLayer->_group, - deconvLayer->_kernel_y, - deconvLayer->_kernel_x + deconvLayer->_kernel[Y_AXIS], + deconvLayer->_kernel[X_AXIS] }; + groupNum = deconvLayer->_group; } else { weightDims = { deconvLayer->input()->getTensorDesc().getDims()[1], deconvLayer->_out_depth, - deconvLayer->_kernel_y, - deconvLayer->_kernel_x + deconvLayer->_kernel[Y_AXIS], + deconvLayer->_kernel[X_AXIS] }; } internalBlobs.push_back(createInternalBlob(weightDims, true)); - stride = {static_cast<int>(deconvLayer->_stride_y), static_cast<int>(deconvLayer->_stride_x)}; - paddingL = {static_cast<int>(deconvLayer->_padding_y), static_cast<int>(deconvLayer->_padding_x)}; - dilation = {static_cast<int>(deconvLayer->_dilation_y) - 1, static_cast<int>(deconvLayer->_dilation_x) - 1}; - paddingR = {0, 0}; + invertVectorCopyUtoI(deconvLayer->_stride, stride); + for (int i = 1; i <= deconvLayer->_dilation.size(); i++) { + dilation.push_back(static_cast<int>(deconvLayer->_dilation[deconvLayer->_dilation.size() - i]) - 1); + } + invertVectorCopyUtoI(deconvLayer->_padding, paddingL); + invertVectorCopyUtoI(deconvLayer->_pads_end, paddingR); weightsDims = MKLDNNDims(weightDims); @@ -112,36 +116,30 @@ void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) { if (withBiases) { const auto *bias = biases->buffer().as<const float*>(); - auto& dst = getChildEdgeAt(0)->getMemory(); - - float *output = reinterpret_cast<float*>(dst.GetData()) + - dst.GetDescriptor().data.layout_desc.blocking.offset_padding; - - const auto &dims = dst.GetDims(); + auto dst = getChildEdgeAt(0)->getBlob(); - const int N = dims[0]; - const int C = dims[1]; - const int H = dims[2]; - const int W = dims[3]; + float *output = dst->buffer().as<float *>() + dst->getTensorDesc().getBlockingDesc().getOffsetPadding(); - memory::format fmt = MKLDNNMemoryDesc(getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc).getFormat(); + const size_t N = dst->getTensorDesc().getDims()[0]; + const size_t C = dst->getTensorDesc().getBlockingDesc().getBlockDims()[1] / groupNum; + const size_t H = dst->getTensorDesc().getDims()[2]; + const size_t W = dst->getTensorDesc().getDims()[3]; + const size_t blkC = + dst->getTensorDesc().getBlockingDesc().getBlockDims().size() > 4 ? + dst->getTensorDesc().getBlockingDesc().getBlockDims()[4] : + 1; - const int blksize = fmt == memory::nChw16c ? 16 : - fmt == memory::nChw8c ? 8 : 1; + auto strides = dst->getTensorDesc().getBlockingDesc().getStrides(); -# pragma omp parallel for collapse(4) schedule(static) - for (int n = 0; n < N; ++n) - for (int c = 0; c < C / blksize; ++c) - for (int h = 0; h < H; ++h) - for (int w = 0; w < W; ++w) { - const int off = - n * C * H * W + c * H * W * blksize + - h * W * blksize + w * blksize; - auto o = &output[off]; - for (int bc = 0; bc < blksize; ++bc) { - o[bc] += bias[c*blksize + bc]; + parallel_for4d(N, C, H, W, [&](size_t n, size_t c, size_t h, size_t w) { + for (size_t g = 0; g < groupNum; g++) { + const size_t off = n * strides[0] + (g * C + c) * strides[1] + h * strides[2] + w * strides[3]; + auto o = &output[off]; + for (int bc = 0; bc < blkC; ++bc) { + o[bc] += bias[c * blkC + bc]; + } } - } + }); } } @@ -175,33 +173,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) return; - MKLDNNDims blocked_weightDims(weightsDims); - - if (!withGroups) { - if (in_fmt == memory::nChw16c) { - blocked_weightDims[I_IND] = rnd_up(blocked_weightDims[I_IND], 16); - } else if (in_fmt == memory::nChw8c) { - blocked_weightDims[I_IND] = rnd_up(blocked_weightDims[I_IND], 8); - } - - if (out_fmt == memory::nChw16c) { - blocked_weightDims[O_IND] = rnd_up(blocked_weightDims[O_IND], 16); - } else if (out_fmt == memory::nChw8c) { - blocked_weightDims[O_IND] = rnd_up(blocked_weightDims[O_IND], 8); - } - } else if (isDW) { - if (out_fmt != in_fmt) - return; - - if (in_fmt == memory::nChw16c) { - blocked_weightDims[0] = rnd_up(blocked_weightDims[0], 16); - } else if (in_fmt == memory::nChw8c) { - blocked_weightDims[0] = rnd_up(blocked_weightDims[0], 8); - } - } - - MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, in_candidate.getDataType(), memory::any}; - + MKLDNNMemoryDesc wgh_candidate{weightsDims, in_candidate.getDataType(), memory::any}; for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) { try { std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc; |