diff options
author | 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com> | 2019-04-01 19:08:26 +0900 |
---|---|---|
committer | GitHub Enterprise <noreply-CODE@samsung.com> | 2019-04-01 19:08:26 +0900 |
commit | 2ade35e42320121d583e907d4737fe29a903f6c8 (patch) | |
tree | 1cff9ffec2d42ecb967f12e753cfb92fd5711820 /runtimes | |
parent | ae077ed17e17f5204def511792d81b9c2c603853 (diff) | |
download | nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.gz nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.bz2 nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.zip |
Introduce cpu quant8 convolution kernel (#4910)
Introduce cpu quantized int8 convolution kernel from tflite and gemmlowp
Use kernel in neurun cpu backend
Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
Diffstat (limited to 'runtimes')
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc | 125 | ||||
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/OperationUtils.cc | 23 | ||||
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/OperationUtils.h | 2 |
3 files changed, 51 insertions, 99 deletions
diff --git a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc index 675e05e9e..672ff587f 100644 --- a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc +++ b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc @@ -18,12 +18,8 @@ #include <cker/operation/Conv.h> -// TODO : Discard legacy methods -#include "tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h" #include "OperationUtils.h" -#include <mutex> - namespace neurun { namespace backend @@ -32,64 +28,6 @@ namespace cpu { namespace kernel { - -// If possible we will use this static buffer for the tensor. -static constexpr int kStaticBufferSize = 1605632; -static char static_scratch_buffer[kStaticBufferSize]; -static std::mutex executionMutex; - -#define ANDROID_NN_CONV_PARAMETERS(Type) \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ - uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; \ - \ - tflite::Dims<4> im2colDim; \ - im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ - im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ - im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ - im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ - \ - im2colDim.strides[0] = 1; \ - for (int i = 1; i < 4; i++) \ - { \ - im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ - } \ - \ - Type *im2colData = nullptr; \ - uint64_t im2colByteSize = sizeof(Type); \ - std::unique_ptr<Type[]> im2colGuard; \ - for (int i = 0; i < 4; i++) \ - { \ - im2colByteSize *= im2colDim.sizes[i]; \ - } \ - /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ - if (im2colByteSize >= 0x7fffffff) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - if (im2colByteSize <= kStaticBufferSize) \ - { \ - im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ - } \ - else \ - { \ - im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ - if (im2colData == nullptr) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - im2colGuard.reset(im2colData); \ - } - ConvolutionLayer::ConvolutionLayer() : _inputData(), _kernelData(), _outputData(), _biasData(), _inputShape(), _kernelShape(), _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0), _paddingRight(0), @@ -123,52 +61,41 @@ bool ConvolutionLayer::convFloat32() bool ConvolutionLayer::convQuant8() { - ANDROID_NN_CONV_PARAMETERS(uint8_t) + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); - int32_t inputOffset = -_inputShape.offset; - int32_t kernelOffset = -_kernelShape.offset; - int32_t outputOffset = _outputShape.offset; float real_multiplier = 0.0; int32_t output_multiplier = 0; int32_t output_shift = 0; - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - - const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape); - const int kernel_width = ArraySize(kernel_dim, 1); - const int kernel_height = ArraySize(kernel_dim, 2); - const bool need_im2col = - _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1; - - uint8_t *im2colDataToPass = nullptr; - if (need_im2col) - { - im2colDataToPass = im2colData; - } - if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape, &real_multiplier) || - !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) + !QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift)) { return false; } - CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, - &output_activation_max); - int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; - - static gemmlowp::GemmContext gemm_context; - // Prevent concurrent executions that may access the scratch buffer and - // gemm_context. - std::unique_lock<std::mutex> lock(executionMutex); - // Alow gemmlowp automatically decide how many threads to use. - gemm_context.set_max_num_threads(0); - tflite::optimized_ops::Conv( - _inputData.u8, convertShapeToDims(_inputShape), inputOffset, _kernelData.u8, - convertShapeToDims(_kernelShape), kernelOffset, _biasData.i32, convertShapeToDims(_biasShape), - _strideWidth, _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, - paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min, - output_activation_max, _outputData.u8, convertShapeToDims(_outputShape), im2colDataToPass, - im2colDim, &gemm_context); + + nnfw::cker::ConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.input_offset = -_inputShape.offset; + op_params.weights_offset = -_kernelShape.offset; + op_params.output_offset = _outputShape.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::Conv(op_params, convertShapeToCkerShape(_inputShape), _inputData.u8, + convertShapeToCkerShape(_kernelShape), _kernelData.u8, + convertShapeToCkerShape(_biasShape), _biasData.i32, + convertShapeToCkerShape(_outputShape), _outputData.u8); + return true; } diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc index db59fa801..5bcc6993b 100644 --- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc +++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc @@ -51,6 +51,29 @@ uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx) return shape.dimensions[dimensionIdx]; } +bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) +{ + if (double_multiplier == 0.) + { + *quantized_multiplier = 0; + *shift = 0; + return true; + } + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*shift; + } + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); + + return true; +} + bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int32_t *right_shift) { diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h index 6a3c54378..95bc16db5 100644 --- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h +++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h @@ -175,6 +175,8 @@ inline TfLiteFusedActivation convertFusedActivation(FuseCode act) return kTfLiteActNone; } +bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); + __wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int32_t *right_shift); |