Introduce cpu quant8 convolution kernel (#4910)

Introduce cpu quantized int8 convolution kernel from tflite and gemmlowp Use kernel in neurun cpu backend Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
author: 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com> 2019-04-01 19:08:26 +0900
committer: GitHub Enterprise <noreply-CODE@samsung.com> 2019-04-01 19:08:26 +0900
commit: 2ade35e42320121d583e907d4737fe29a903f6c8 (patch)
tree: 1cff9ffec2d42ecb967f12e753cfb92fd5711820 /runtimes
parent: ae077ed17e17f5204def511792d81b9c2c603853 (diff)
download: nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.gz
nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.bz2
nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.zip
3 files changed, 51 insertions, 99 deletions
diff --git a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
index 675e05e9e..672ff587f 100644
--- a/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
+++ b/runtimes/neurun/backend/cpu/kernel/ConvolutionLayer.cc
@@ -18,12 +18,8 @@
 
 #include <cker/operation/Conv.h>
 
-// TODO : Discard legacy methods
-#include "tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h"
 #include "OperationUtils.h"
 
-#include <mutex>
-
 namespace neurun
 {
 namespace backend
@@ -32,64 +28,6 @@ namespace cpu
 {
 namespace kernel
 {
-
-// If possible we will use this static buffer for the tensor.
-static constexpr int kStaticBufferSize = 1605632;
-static char static_scratch_buffer[kStaticBufferSize];
-static std::mutex executionMutex;
-
-#define ANDROID_NN_CONV_PARAMETERS(Type)                                      \
-  uint32_t height = getSizeOfDimension(_inputShape, 1);                       \
-  uint32_t width = getSizeOfDimension(_inputShape, 2);                        \
-  uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1);                \
-  uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2);                 \
-  uint32_t outHeight = getSizeOfDimension(_outputShape, 1);                   \
-  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);                    \
-  uint32_t inDepth = getSizeOfDimension(_inputShape, 3);                      \
-                                                                              \
-  uint32_t paddingHeight = (uint32_t)_paddingTop;                             \
-  uint32_t paddingWidth = (uint32_t)_paddingLeft;                             \
-                                                                              \
-  tflite::Dims<4> im2colDim;                                                  \
-  im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0);              \
-  im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1);              \
-  im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2);              \
-  im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth;             \
-                                                                              \
-  im2colDim.strides[0] = 1;                                                   \
-  for (int i = 1; i < 4; i++)                                                 \
-  {                                                                           \
-    im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
-  }                                                                           \
-                                                                              \
-  Type *im2colData = nullptr;                                                 \
-  uint64_t im2colByteSize = sizeof(Type);                                     \
-  std::unique_ptr<Type[]> im2colGuard;                                        \
-  for (int i = 0; i < 4; i++)                                                 \
-  {                                                                           \
-    im2colByteSize *= im2colDim.sizes[i];                                     \
-  }                                                                           \
-  /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */   \
-  if (im2colByteSize >= 0x7fffffff)                                           \
-  {                                                                           \
-    std::cout << "Conv size is too large, not enough memory" << std::endl;    \
-    return false;                                                             \
-  }                                                                           \
-  if (im2colByteSize <= kStaticBufferSize)                                    \
-  {                                                                           \
-    im2colData = reinterpret_cast<Type *>(static_scratch_buffer);             \
-  }                                                                           \
-  else                                                                        \
-  {                                                                           \
-    im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];      \
-    if (im2colData == nullptr)                                                \
-    {                                                                         \
-      std::cout << "Conv size is too large, not enough memory" << std::endl;  \
-      return false;                                                           \
-    }                                                                         \
-    im2colGuard.reset(im2colData);                                            \
-  }
-
 ConvolutionLayer::ConvolutionLayer()
     : _inputData(), _kernelData(), _outputData(), _biasData(), _inputShape(), _kernelShape(),
       _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
@@ -123,52 +61,41 @@ bool ConvolutionLayer::convFloat32()
 
 bool ConvolutionLayer::convQuant8()
 {
-  ANDROID_NN_CONV_PARAMETERS(uint8_t)
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
 
-  int32_t inputOffset = -_inputShape.offset;
-  int32_t kernelOffset = -_kernelShape.offset;
-  int32_t outputOffset = _outputShape.offset;
   float real_multiplier = 0.0;
   int32_t output_multiplier = 0;
   int32_t output_shift = 0;
-  int32_t output_activation_min = 0;
-  int32_t output_activation_max = 0;
-
-  const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape);
-  const int kernel_width = ArraySize(kernel_dim, 1);
-  const int kernel_height = ArraySize(kernel_dim, 2);
-  const bool need_im2col =
-      _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1;
-
-  uint8_t *im2colDataToPass = nullptr;
-  if (need_im2col)
-  {
-    im2colDataToPass = im2colData;
-  }
-
   if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape,
                                         &real_multiplier) ||
-      !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+      !QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift))
   {
     return false;
   }
-  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
-                                &output_activation_max);
-  int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
-
-  static gemmlowp::GemmContext gemm_context;
-  // Prevent concurrent executions that may access the scratch buffer and
-  // gemm_context.
-  std::unique_lock<std::mutex> lock(executionMutex);
-  // Alow gemmlowp automatically decide how many threads to use.
-  gemm_context.set_max_num_threads(0);
-  tflite::optimized_ops::Conv(
-      _inputData.u8, convertShapeToDims(_inputShape), inputOffset, _kernelData.u8,
-      convertShapeToDims(_kernelShape), kernelOffset, _biasData.i32, convertShapeToDims(_biasShape),
-      _strideWidth, _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth,
-      paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min,
-      output_activation_max, _outputData.u8, convertShapeToDims(_outputShape), im2colDataToPass,
-      im2colDim, &gemm_context);
+
+  nnfw::cker::ConvParams op_params;
+  op_params.stride_width = _strideWidth;
+  op_params.stride_height = _strideHeight;
+  op_params.dilation_width_factor = 1;
+  op_params.dilation_height_factor = 1;
+  op_params.padding_values.width = _paddingLeft;
+  op_params.padding_values.height = _paddingTop;
+  op_params.input_offset = -_inputShape.offset;
+  op_params.weights_offset = -_kernelShape.offset;
+  op_params.output_offset = _outputShape.offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+
+  nnfw::cker::Conv(op_params, convertShapeToCkerShape(_inputShape), _inputData.u8,
+                   convertShapeToCkerShape(_kernelShape), _kernelData.u8,
+                   convertShapeToCkerShape(_biasShape), _biasData.i32,
+                   convertShapeToCkerShape(_outputShape), _outputData.u8);
+
   return true;
 }
 
diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
index db59fa801..5bcc6993b 100644
--- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
+++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.cc
@@ -51,6 +51,29 @@ uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
   return shape.dimensions[dimensionIdx];
 }
 
+bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return true;
+  }
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+
+  assert(q_fixed <= (1ll << 31));
+  if (q_fixed == (1ll << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+
+  return true;
+}
+
 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                       int32_t *right_shift)
 {
diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
index 6a3c54378..95bc16db5 100644
--- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
+++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
@@ -175,6 +175,8 @@ inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
   return kTfLiteActNone;
 }
 
+bool QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
 __wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
                                             int32_t *right_shift);
author	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>	2019-04-01 19:08:26 +0900
committer	GitHub Enterprise <noreply-CODE@samsung.com>	2019-04-01 19:08:26 +0900
commit	2ade35e42320121d583e907d4737fe29a903f6c8 (patch)
tree	1cff9ffec2d42ecb967f12e753cfb92fd5711820 /runtimes
parent	ae077ed17e17f5204def511792d81b9c2c603853 (diff)
download	nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.gz nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.tar.bz2 nnfw-2ade35e42320121d583e907d4737fe29a903f6c8.zip