Publishing R4 (#41)

* Publishing R4
author: Alexey Suhov <asuhov@users.noreply.github.com> 2018-11-23 16:19:43 +0300
committer: openvino-pushbot <44090433+openvino-pushbot@users.noreply.github.com> 2018-11-23 16:19:43 +0300
commit: 55a41d7570f78aaea0d6764d157dd7434730d56f (patch)
tree: ba022c71609b93d51119bcb25e5ccb8c7147dbd3 /inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
parent: 54eab180361ec09fbd82e2bb62adfeb521275774 (diff)
download: dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.gz
dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.bz2
dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.zip
1 files changed, 34 insertions, 62 deletions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 6335ce168..1295e05a5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -11,6 +11,7 @@
 #include <vector>
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
+#include "ie_parallel.hpp"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -36,9 +37,9 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
     if (getParentEdges().size() != 1)
-        THROW_IE_EXCEPTION << "Incorrect number of input edges.";
+        THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
-        THROW_IE_EXCEPTION << "Incorrect number of output edges.";
+        THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
 
     auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
     if (deconvLayer == nullptr)
@@ -66,24 +67,27 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
                 deconvLayer->_group,
                 deconvLayer->input()->getTensorDesc().getDims()[1] / deconvLayer->_group,
                 deconvLayer->_out_depth / deconvLayer->_group,
-                deconvLayer->_kernel_y,
-                deconvLayer->_kernel_x
+                deconvLayer->_kernel[Y_AXIS],
+                deconvLayer->_kernel[X_AXIS]
         };
+        groupNum = deconvLayer->_group;
     } else {
         weightDims = {
                 deconvLayer->input()->getTensorDesc().getDims()[1],
                 deconvLayer->_out_depth,
-                deconvLayer->_kernel_y,
-                deconvLayer->_kernel_x
+                deconvLayer->_kernel[Y_AXIS],
+                deconvLayer->_kernel[X_AXIS]
         };
     }
 
     internalBlobs.push_back(createInternalBlob(weightDims, true));
 
-    stride = {static_cast<int>(deconvLayer->_stride_y), static_cast<int>(deconvLayer->_stride_x)};
-    paddingL = {static_cast<int>(deconvLayer->_padding_y), static_cast<int>(deconvLayer->_padding_x)};
-    dilation = {static_cast<int>(deconvLayer->_dilation_y) - 1, static_cast<int>(deconvLayer->_dilation_x) - 1};
-    paddingR = {0, 0};
+    invertVectorCopyUtoI(deconvLayer->_stride, stride);
+    for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
+        dilation.push_back(static_cast<int>(deconvLayer->_dilation[deconvLayer->_dilation.size() - i]) - 1);
+    }
+    invertVectorCopyUtoI(deconvLayer->_padding, paddingL);
+    invertVectorCopyUtoI(deconvLayer->_pads_end, paddingR);
 
     weightsDims = MKLDNNDims(weightDims);
 
@@ -112,36 +116,30 @@ void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) {
     if (withBiases) {
         const auto *bias = biases->buffer().as<const float*>();
 
-        auto& dst = getChildEdgeAt(0)->getMemory();
-
-        float *output = reinterpret_cast<float*>(dst.GetData()) +
-                        dst.GetDescriptor().data.layout_desc.blocking.offset_padding;
-
-        const auto &dims = dst.GetDims();
+        auto dst = getChildEdgeAt(0)->getBlob();
 
-        const int N = dims[0];
-        const int C = dims[1];
-        const int H = dims[2];
-        const int W = dims[3];
+        float *output = dst->buffer().as<float *>() + dst->getTensorDesc().getBlockingDesc().getOffsetPadding();
 
-        memory::format fmt = MKLDNNMemoryDesc(getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc).getFormat();
+        const size_t N = dst->getTensorDesc().getDims()[0];
+        const size_t C = dst->getTensorDesc().getBlockingDesc().getBlockDims()[1] / groupNum;
+        const size_t H = dst->getTensorDesc().getDims()[2];
+        const size_t W = dst->getTensorDesc().getDims()[3];
+        const size_t blkC =
+                dst->getTensorDesc().getBlockingDesc().getBlockDims().size() > 4 ?
+                dst->getTensorDesc().getBlockingDesc().getBlockDims()[4] :
+                1;
 
-        const int blksize = fmt == memory::nChw16c ? 16 :
-                            fmt == memory::nChw8c ? 8 : 1;
+        auto strides = dst->getTensorDesc().getBlockingDesc().getStrides();
 
-#   pragma omp parallel for collapse(4) schedule(static)
-        for (int n = 0; n < N; ++n)
-        for (int c = 0; c < C / blksize; ++c)
-        for (int h = 0; h < H; ++h)
-        for (int w = 0; w < W; ++w) {
-            const int off =
-                    n * C * H * W + c * H * W * blksize +
-                    h * W * blksize + w * blksize;
-            auto o = &output[off];
-            for (int bc = 0; bc < blksize; ++bc) {
-                o[bc] += bias[c*blksize + bc];
+        parallel_for4d(N, C, H, W, [&](size_t n, size_t c, size_t h, size_t w) {
+            for (size_t g = 0; g < groupNum; g++) {
+                const size_t off = n * strides[0] + (g * C + c) * strides[1] + h * strides[2] + w * strides[3];
+                auto o = &output[off];
+                for (int bc = 0; bc < blkC; ++bc) {
+                    o[bc] += bias[c * blkC + bc];
+                }
             }
-        }
+        });
     }
 }
 
@@ -175,33 +173,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
     if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
         return;
 
-    MKLDNNDims blocked_weightDims(weightsDims);
-
-    if (!withGroups) {
-        if (in_fmt == memory::nChw16c) {
-            blocked_weightDims[I_IND] = rnd_up(blocked_weightDims[I_IND], 16);
-        } else if (in_fmt == memory::nChw8c) {
-            blocked_weightDims[I_IND] = rnd_up(blocked_weightDims[I_IND], 8);
-        }
-
-        if (out_fmt == memory::nChw16c) {
-            blocked_weightDims[O_IND] = rnd_up(blocked_weightDims[O_IND], 16);
-        } else if (out_fmt == memory::nChw8c) {
-            blocked_weightDims[O_IND] = rnd_up(blocked_weightDims[O_IND], 8);
-        }
-    } else if (isDW) {
-        if (out_fmt != in_fmt)
-            return;
-
-        if (in_fmt == memory::nChw16c) {
-            blocked_weightDims[0] = rnd_up(blocked_weightDims[0], 16);
-        } else if (in_fmt == memory::nChw8c) {
-            blocked_weightDims[0] = rnd_up(blocked_weightDims[0], 8);
-        }
-    }
-
-    MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, in_candidate.getDataType(), memory::any};
-
+    MKLDNNMemoryDesc wgh_candidate{weightsDims, in_candidate.getDataType(), memory::any};
     for (auto alg : {algorithm::convolution_winograd, algorithm::convolution_direct}) {
         try {
             std::shared_ptr<mkldnn::convolution_forward::desc> conv_desc;
author	Alexey Suhov <asuhov@users.noreply.github.com>	2018-11-23 16:19:43 +0300
committer	openvino-pushbot <44090433+openvino-pushbot@users.noreply.github.com>	2018-11-23 16:19:43 +0300
commit	55a41d7570f78aaea0d6764d157dd7434730d56f (patch)
tree	ba022c71609b93d51119bcb25e5ccb8c7147dbd3 /inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
parent	54eab180361ec09fbd82e2bb62adfeb521275774 (diff)
download	dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.gz dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.tar.bz2 dldt-55a41d7570f78aaea0d6764d157dd7434730d56f.zip