diff options
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc')
-rw-r--r-- | runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc new file mode 100644 index 000000000..81e88e0f0 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +#include <mutex> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +// If possible we will use this static buffer for the tensor. +static constexpr int kStaticBufferSize = 1605632; +static char static_scratch_buffer[kStaticBufferSize]; +static std::mutex executionMutex; + +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ + uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; \ + \ + ::tflite::Dims<4> im2colDim; \ + im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ + im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ + im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ + im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ + \ + im2colDim.strides[0] = 1; \ + for (int i = 1; i < 4; i++) \ + { \ + im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ + } \ + Type *im2colData = nullptr; \ + uint64_t im2colByteSize = sizeof(Type); \ + std::unique_ptr<Type[]> im2colGuard; \ + for (int i = 0; i < 4; i++) \ + { \ + im2colByteSize *= im2colDim.sizes[i]; \ + } \ + /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ + if (im2colByteSize >= 0x7fffffff) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + if (im2colByteSize <= kStaticBufferSize) \ + { \ + im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ + } \ + else \ + { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + if (im2colData == nullptr) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + im2colGuard.reset(im2colData); \ + } + +ConvolutionLayer::ConvolutionLayer() + : _inputData(nullptr), _kernelData(nullptr), _outputData(nullptr), _biasData(nullptr), + _inputShape(), _kernelShape(), _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool ConvolutionLayer::convFloat32() +{ + ANDROID_NN_CONV_PARAMETERS(float) + + const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape); + const int kernel_width = ArraySize(kernel_dim, 1); + const int kernel_height = ArraySize(kernel_dim, 2); + const bool need_im2col = + _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1; + + float *im2colDataToPass = nullptr; + if (need_im2col) + { + im2colDataToPass = im2colData; + } + + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + ::tflite::optimized_ops::Conv( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), + reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape), + reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth, + _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight, + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape), im2colDataToPass, im2colDim); + return true; +} + +bool ConvolutionLayer::convQuant8() +{ + ANDROID_NN_CONV_PARAMETERS(uint8_t) + int32_t inputOffset = -_inputShape.offset; + int32_t kernelOffset = -_kernelShape.offset; + int32_t outputOffset = _outputShape.offset; + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape, + &real_multiplier) || + !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) + { + return false; + } + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); + static gemmlowp::GemmContext gemm_context; + // Prevent concurrent executions that may access the scratch buffer and + // gemm_context. + std::unique_lock<std::mutex> lock(executionMutex); + // Alow gemmlowp automatically decide how many threads to use. + gemm_context.set_max_num_threads(0); + ::tflite::optimized_ops::Conv( + _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData, + convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData), + convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight, + outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max, + _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context); + return true; +} + +void ConvolutionLayer::configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData, + const Shape kernelShape, uint8_t *biasData, const Shape biasShape, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const FuseCode activation, uint8_t *outputData, + const Shape outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _inputType = inputShape.type; + _kernelData = kernelData; + _kernelShape = kernelShape; + _biasData = biasData; + _biasShape = biasShape; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _activation = activation; + _outputData = outputData; + _outputShape = outputShape; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // convQuant8(); + } +} + +#undef ANDROID_NN_CONV_PARAMETERS + +} // namespace cpu +} // namespace kernel +} // namespace neurun |