summaryrefslogtreecommitdiff
path: root/runtimes
diff options
context:
space:
mode:
author오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>2019-04-01 19:08:26 +0900
committerGitHub Enterprise <noreply-CODE@samsung.com>2019-04-01 19:08:26 +0900
commit2ade35e42320121d583e907d4737fe29a903f6c8 (patch)
tree1cff9ffec2d42ecb967f12e753cfb92fd5711820 /runtimes
parentae077ed17e17f5204def511792d81b9c2c603853 (diff)
downloadnnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.gz
nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.bz2
nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.zip
Introduce cpu quant8 convolution kernel (#4910)
Introduce cpu quantized int8 convolution kernel from tflite and gemmlowp Use kernel in neurun cpu backend Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
Diffstat (limited to 'runtimes')
-rw-r--r--runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc125
-rw-r--r--runtimes/neurun/backend/cpu/kernel/OperationUtils.cc23
-rw-r--r--runtimes/neurun/backend/cpu/kernel/OperationUtils.h2
3 files changed, 51 insertions, 99 deletions
diff --git a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
index 675e05e9e..672ff587f 100644
--- a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
+++ b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
@@ -18,12 +18,8 @@
#include <cker/operation/Conv.h>
-// TODO : Discard legacy methods
-#include "tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h"
#include "OperationUtils.h"
-#include <mutex>
-
namespace neurun
{
namespace backend
@@ -32,64 +28,6 @@ namespace cpu
{
namespace kernel
{
-
-// If possible we will use this static buffer for the tensor.
-static constexpr int kStaticBufferSize = 1605632;
-static char static_scratch_buffer[kStaticBufferSize];
-static std::mutex executionMutex;
-
-#define ANDROID_NN_CONV_PARAMETERS(Type) \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
- uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft; \
- \
- tflite::Dims<4> im2colDim; \
- im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
- im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
- im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
- im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
- \
- im2colDim.strides[0] = 1; \
- for (int i = 1; i < 4; i++) \
- { \
- im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
- } \
- \
- Type *im2colData = nullptr; \
- uint64_t im2colByteSize = sizeof(Type); \
- std::unique_ptr<Type[]> im2colGuard; \
- for (int i = 0; i < 4; i++) \
- { \
- im2colByteSize *= im2colDim.sizes[i]; \
- } \
- /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
- if (im2colByteSize >= 0x7fffffff) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- if (im2colByteSize <= kStaticBufferSize) \
- { \
- im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
- } \
- else \
- { \
- im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
- if (im2colData == nullptr) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- im2colGuard.reset(im2colData); \
- }
-
ConvolutionLayer::ConvolutionLayer()
: _inputData(), _kernelData(), _outputData(), _biasData(), _inputShape(), _kernelShape(),
_outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
@@ -123,52 +61,41 @@ bool ConvolutionLayer::convFloat32()
bool ConvolutionLayer::convQuant8()
{
- ANDROID_NN_CONV_PARAMETERS(uint8_t)
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
- int32_t inputOffset = -_inputShape.offset;
- int32_t kernelOffset = -_kernelShape.offset;
- int32_t outputOffset = _outputShape.offset;
float real_multiplier = 0.0;
int32_t output_multiplier = 0;
int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
-
- const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape);
- const int kernel_width = ArraySize(kernel_dim, 1);
- const int kernel_height = ArraySize(kernel_dim, 2);
- const bool need_im2col =
- _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1;
-
- uint8_t *im2colDataToPass = nullptr;
- if (need_im2col)
- {
- im2colDataToPass = im2colData;
- }
-
if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape,
&real_multiplier) ||
- !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+ !QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift))
{
return false;
}
- CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
- &output_activation_max);
- int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
-
- static gemmlowp::GemmContext gemm_context;
- // Prevent concurrent executions that may access the scratch buffer and
- // gemm_context.
- std::unique_lock<std::mutex> lock(executionMutex);
- // Alow gemmlowp automatically decide how many threads to use.
- gemm_context.set_max_num_threads(0);
- tflite::optimized_ops::Conv(
- _inputData.u8, convertShapeToDims(_inputShape), inputOffset, _kernelData.u8,
- convertShapeToDims(_kernelShape), kernelOffset, _biasData.i32, convertShapeToDims(_biasShape),
- _strideWidth, _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth,
- paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min,
- output_activation_max, _outputData.u8, convertShapeToDims(_outputShape), im2colDataToPass,
- im2colDim, &gemm_context);
+
+ nnfw::cker::ConvParams op_params;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = 1;
+ op_params.dilation_height_factor = 1;
+ op_params.padding_values.width = _paddingLeft;
+ op_params.padding_values.height = _paddingTop;
+ op_params.input_offset = -_inputShape.offset;
+ op_params.weights_offset = -_kernelShape.offset;
+ op_params.output_offset = _outputShape.offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
+ nnfw::cker::Conv(op_params, convertShapeToCkerShape(_inputShape), _inputData.u8,
+ convertShapeToCkerShape(_kernelShape), _kernelData.u8,
+ convertShapeToCkerShape(_biasShape), _biasData.i32,
+ convertShapeToCkerShape(_outputShape), _outputData.u8);
+
return true;
}
diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
index db59fa801..5bcc6993b 100644
--- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
+++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
@@ -51,6 +51,29 @@ uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
return shape.dimensions[dimensionIdx];
}
+bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+ if (double_multiplier == 0.)
+ {
+ *quantized_multiplier = 0;
+ *shift = 0;
+ return true;
+ }
+ const double q = std::frexp(double_multiplier, shift);
+ auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+
+ assert(q_fixed <= (1ll << 31));
+ if (q_fixed == (1ll << 31))
+ {
+ q_fixed /= 2;
+ ++*shift;
+ }
+ assert(q_fixed <= std::numeric_limits<int32_t>::max());
+ *quantized_multiplier = static_cast<int32_t>(q_fixed);
+
+ return true;
+}
+
bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
int32_t *right_shift)
{
diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
index 6a3c54378..95bc16db5 100644
--- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
+++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
@@ -175,6 +175,8 @@ inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
return kTfLiteActNone;
}
+bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
int32_t *right_shift);