summaryrefslogtreecommitdiff
path: root/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc')
-rw-r--r--runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc233
1 files changed, 233 insertions, 0 deletions
diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc
new file mode 100644
index 000000000..4e70f6319
--- /dev/null
+++ b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "OperationUtils.h"
+#include <ncnn/srcn/srcn_conv.h>
+
+namespace neurun
+{
+namespace backend
+{
+namespace srcn
+{
+namespace kernel
+{
+
+ConvolutionLayer::ConvolutionLayer()
+ : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(),
+ _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0),
+ _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+ _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major), _winograd_weights(nullptr),
+ _sparse_weights(nullptr)
+{
+ // DO NOTHING
+}
+
+ConvolutionLayer::~ConvolutionLayer()
+{
+ // TODO Move managing constant _winograd_data and sparse
+ nnfw::srcn::winograd_release(_winograd_weights);
+ size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
+ nnfw::srcn::sparse_release(_outputDescr.dimensions[depth_index], _sparse_weights);
+}
+
+void ConvolutionLayer::convFloat32()
+{
+ nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat;
+ nnfw::srcn::convParams_t in_param;
+
+ assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major);
+ size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2;
+ size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3;
+ size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
+ size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1;
+ size_t kernel_output_depth_index = 0;
+
+ const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0);
+ const int input_height = _inputDescr.dimensions[height_index];
+ const int input_width = _inputDescr.dimensions[width_index];
+ const int input_depth =
+ MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index);
+ in_mat.c = input_depth;
+ in_mat.w = input_width;
+ in_mat.h = input_height;
+ in_mat.n = batches;
+ in_mat.data = _inputData.f;
+
+ const int output_height = _outputDescr.dimensions[height_index];
+ const int output_width = _outputDescr.dimensions[width_index];
+ const int output_depth =
+ MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index);
+ out_mat.c = output_depth;
+ out_mat.w = output_width;
+ out_mat.h = output_height;
+ out_mat.n = batches;
+ out_mat.data = _outputData.f;
+
+ const int outch = _kernelDescr.dimensions[kernel_output_depth_index];
+ const int inch = _kernelDescr.dimensions[kernel_input_depth_index];
+ const int ker_height = _kernelDescr.dimensions[height_index];
+ const int ker_width = _kernelDescr.dimensions[width_index];
+ kernel_mat.c = input_depth;
+ kernel_mat.w = ker_width;
+ kernel_mat.h = ker_height;
+ kernel_mat.n = output_depth;
+ kernel_mat.data = _kernelData.f;
+
+ in_param.kernel_w = ker_width;
+ in_param.kernel_h = ker_height;
+ in_param.stride_w = _strideWidth;
+ in_param.stride_h = _strideHeight;
+ in_param.padding = _paddingType;
+ in_param.pad_w = _paddingLeft;
+ in_param.pad_h = _paddingTop;
+ in_param.dilation_w = 1;
+ in_param.dilation_h = 1;
+
+ nnfw::srcn::winogradParams_t winograd_param;
+ winograd_param.kernel_w = ker_width;
+ winograd_param.kernel_h = ker_height;
+ winograd_param.stride_w = _strideWidth;
+ winograd_param.stride_h = _strideHeight;
+ winograd_param.dilation_w = 1;
+ winograd_param.dilation_h = 1;
+ winograd_param.batch = batches;
+ winograd_param.w = ker_width;
+ winograd_param.h = ker_height;
+ winograd_param.inch = inch;
+ winograd_param.outch = outch;
+ winograd_param.num_threads = 4;
+
+ winograd_param.conv_type = _layout;
+ winograd_param.weight_data = _kernelData.f;
+
+ // Without winograd
+ if (nnfw::srcn::check_winograd(winograd_param))
+ {
+ _winograd_weights = nnfw::srcn::trans_weight2winograd(winograd_param, nullptr);
+ }
+ _sparse_weights = nnfw::srcn::trans_weight2sparse(kernel_mat);
+
+ nnfw::srcn::srcn_convolution2D(in_mat, kernel_mat, out_mat, in_param, _winograd_weights, 4,
+ _layout);
+
+ // Add biases
+ if (_biasData.f == nullptr)
+ {
+ return;
+ }
+ // TODO Optimize
+ uint32_t strides[4] = {
+ _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3],
+ _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1};
+ if (_layout == nnfw::srcn::convType_t::col_major)
+ {
+ for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c)
+ {
+ if (_biasData.f[c] != 0)
+ {
+ for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
+ {
+ for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h)
+ {
+ for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w)
+ {
+ _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] +=
+ _biasData.f[c];
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (_layout == nnfw::srcn::convType_t::row_major)
+ {
+ for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c)
+ {
+ if (_biasData.f[c] != 0)
+ {
+ for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b)
+ {
+ for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h)
+ {
+ for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w)
+ {
+ _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] +=
+ _biasData.f[c];
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ throw std::runtime_error("Wrong Layout");
+ }
+}
+
+void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr,
+ uint8_t *kernelData, const TensorDescriptor kernelDescr,
+ uint8_t *biasData, const TensorDescriptor biasDescr,
+ const uint32_t paddingType, const uint32_t paddingLeft,
+ const uint32_t paddingRight, const uint32_t paddingTop,
+ const uint32_t paddingBottom, const uint32_t strideWidth,
+ const uint32_t strideHeight, uint8_t *outputData,
+ const TensorDescriptor outputDescr, ir::Layout layout)
+{
+ assert(inputDescr.dimensions.size() == 4);
+ assert(kernelDescr.dimensions.size() == 4);
+ assert(biasDescr.dimensions.size() == 1);
+ assert(outputDescr.dimensions.size() == 4);
+ assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type);
+ // TODO Add assertions validating height and width with padding
+ _layout = convertLayout(layout);
+ _inputData.u8 = inputData;
+ _inputDescr = inputDescr;
+ _inputType = inputDescr.type;
+ _kernelData.u8 = kernelData;
+ _kernelDescr = kernelDescr;
+ _biasData.u8 = biasData;
+ _biasDescr = biasDescr;
+ _paddingType = paddingType;
+ _paddingLeft = paddingLeft;
+ _paddingRight = paddingRight;
+ _paddingTop = paddingTop;
+ _paddingBottom = paddingBottom;
+ _strideWidth = strideWidth;
+ _strideHeight = strideHeight;
+ _outputData.u8 = outputData;
+ _outputDescr = outputDescr;
+}
+
+void ConvolutionLayer::run()
+{
+ if (_inputType == OperandType::FLOAT32)
+ {
+ convFloat32();
+ }
+ else if (_inputType == OperandType::QUANT8_ASYMM)
+ {
+ throw std::runtime_error("NYI");
+ }
+}
+
+} // namespace kernel
+} // namespace srcn
+} // namespace backend
+} // namespace neurun