diff options
Diffstat (limited to 'runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc')
-rw-r--r-- | runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc new file mode 100644 index 000000000..4e70f6319 --- /dev/null +++ b/runtime/neurun/backend/srcn/kernel/ConvolutionLayer.cc @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "OperationUtils.h" +#include <ncnn/srcn/srcn_conv.h> + +namespace neurun +{ +namespace backend +{ +namespace srcn +{ +namespace kernel +{ + +ConvolutionLayer::ConvolutionLayer() + : _inputData(), _kernelData(), _biasData(), _outputData(), _inputDescr(), _kernelDescr(), + _biasDescr(), _outputDescr(), _paddingType(0), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _inputType(OperandType::FLOAT32), _layout(nnfw::srcn::col_major), _winograd_weights(nullptr), + _sparse_weights(nullptr) +{ + // DO NOTHING +} + +ConvolutionLayer::~ConvolutionLayer() +{ + // TODO Move managing constant _winograd_data and sparse + nnfw::srcn::winograd_release(_winograd_weights); + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + nnfw::srcn::sparse_release(_outputDescr.dimensions[depth_index], _sparse_weights); +} + +void ConvolutionLayer::convFloat32() +{ + nnfw::srcn::convMat_t in_mat, out_mat, kernel_mat; + nnfw::srcn::convParams_t in_param; + + assert(_layout == nnfw::srcn::col_major || _layout == nnfw::srcn::row_major); + size_t height_index = _layout == nnfw::srcn::col_major ? 1 : 2; + size_t width_index = _layout == nnfw::srcn::col_major ? 2 : 3; + size_t depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_input_depth_index = _layout == nnfw::srcn::col_major ? 3 : 1; + size_t kernel_output_depth_index = 0; + + const int batches = MatchingDim(_inputDescr, 0, _outputDescr, 0); + const int input_height = _inputDescr.dimensions[height_index]; + const int input_width = _inputDescr.dimensions[width_index]; + const int input_depth = + MatchingDim(_inputDescr, depth_index, _kernelDescr, kernel_input_depth_index); + in_mat.c = input_depth; + in_mat.w = input_width; + in_mat.h = input_height; + in_mat.n = batches; + in_mat.data = _inputData.f; + + const int output_height = _outputDescr.dimensions[height_index]; + const int output_width = _outputDescr.dimensions[width_index]; + const int output_depth = + MatchingDim(_kernelDescr, kernel_output_depth_index, _outputDescr, depth_index); + out_mat.c = output_depth; + out_mat.w = output_width; + out_mat.h = output_height; + out_mat.n = batches; + out_mat.data = _outputData.f; + + const int outch = _kernelDescr.dimensions[kernel_output_depth_index]; + const int inch = _kernelDescr.dimensions[kernel_input_depth_index]; + const int ker_height = _kernelDescr.dimensions[height_index]; + const int ker_width = _kernelDescr.dimensions[width_index]; + kernel_mat.c = input_depth; + kernel_mat.w = ker_width; + kernel_mat.h = ker_height; + kernel_mat.n = output_depth; + kernel_mat.data = _kernelData.f; + + in_param.kernel_w = ker_width; + in_param.kernel_h = ker_height; + in_param.stride_w = _strideWidth; + in_param.stride_h = _strideHeight; + in_param.padding = _paddingType; + in_param.pad_w = _paddingLeft; + in_param.pad_h = _paddingTop; + in_param.dilation_w = 1; + in_param.dilation_h = 1; + + nnfw::srcn::winogradParams_t winograd_param; + winograd_param.kernel_w = ker_width; + winograd_param.kernel_h = ker_height; + winograd_param.stride_w = _strideWidth; + winograd_param.stride_h = _strideHeight; + winograd_param.dilation_w = 1; + winograd_param.dilation_h = 1; + winograd_param.batch = batches; + winograd_param.w = ker_width; + winograd_param.h = ker_height; + winograd_param.inch = inch; + winograd_param.outch = outch; + winograd_param.num_threads = 4; + + winograd_param.conv_type = _layout; + winograd_param.weight_data = _kernelData.f; + + // Without winograd + if (nnfw::srcn::check_winograd(winograd_param)) + { + _winograd_weights = nnfw::srcn::trans_weight2winograd(winograd_param, nullptr); + } + _sparse_weights = nnfw::srcn::trans_weight2sparse(kernel_mat); + + nnfw::srcn::srcn_convolution2D(in_mat, kernel_mat, out_mat, in_param, _winograd_weights, 4, + _layout); + + // Add biases + if (_biasData.f == nullptr) + { + return; + } + // TODO Optimize + uint32_t strides[4] = { + _outputDescr.dimensions[1] * _outputDescr.dimensions[2] * _outputDescr.dimensions[3], + _outputDescr.dimensions[2] * _outputDescr.dimensions[3], _outputDescr.dimensions[3], 1}; + if (_layout == nnfw::srcn::convType_t::col_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[3]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[1]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[2]; ++w) + { + _outputData.f[b * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else if (_layout == nnfw::srcn::convType_t::row_major) + { + for (uint32_t c = 0; c < _outputDescr.dimensions[1]; ++c) + { + if (_biasData.f[c] != 0) + { + for (uint32_t b = 0; b < _outputDescr.dimensions[0]; ++b) + { + for (uint32_t h = 0; h < _outputDescr.dimensions[2]; ++h) + { + for (uint32_t w = 0; w < _outputDescr.dimensions[3]; ++w) + { + _outputData.f[b * strides[0] + c * strides[1] + h * strides[2] + w * strides[3]] += + _biasData.f[c]; + } + } + } + } + } + } + else + { + throw std::runtime_error("Wrong Layout"); + } +} + +void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingType, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, uint8_t *outputData, + const TensorDescriptor outputDescr, ir::Layout layout) +{ + assert(inputDescr.dimensions.size() == 4); + assert(kernelDescr.dimensions.size() == 4); + assert(biasDescr.dimensions.size() == 1); + assert(outputDescr.dimensions.size() == 4); + assert(inputDescr.type == kernelDescr.type && inputDescr.type == outputDescr.type); + // TODO Add assertions validating height and width with padding + _layout = convertLayout(layout); + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingType = paddingType; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + throw std::runtime_error("NYI"); + } +} + +} // namespace kernel +} // namespace srcn +} // namespace backend +} // namespace neurun |