Introduce concat kernel (#4887)

Introduce concat kernel from tflite Use concat kernel in cpu backend Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
author: 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com> 2019-03-28 15:30:23 +0900
committer: GitHub Enterprise <noreply-CODE@samsung.com> 2019-03-28 15:30:23 +0900
commit: dee9a3d3c61c464241db6f6858609ca772759ad7 (patch)
tree: 747017e967818fb60c8510000a09bbaf1ff74fd1
parent: b425807196464b75b4c2ad676e23cc65216f0bfe (diff)
download: nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.gz
nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.bz2
nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.zip
5 files changed, 139 insertions, 16 deletions
diff --git a/libs/cker/include/cker/Shape.h b/libs/cker/include/cker/Shape.h
index b6f0602be..d4f54350b 100644
--- a/libs/cker/include/cker/Shape.h
+++ b/libs/cker/include/cker/Shape.h
@@ -205,6 +205,16 @@ private:
   };
 };
 
+inline int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2)
+{
+  UNUSED_RELEASE(shape2);
+  UNUSED_RELEASE(index2);
+  assert(shape1.Dims(index1) == shape2.Dims(index2));
+  return shape1.Dims(index1);
+}
+
+inline Shape GetShape(std::vector<int32_t> data) { return Shape(data.size(), data.data()); }
+
 } // namespace cker
 } // namespace nnfw
 
diff --git a/libs/cker/include/cker/operation/Concatenation.h b/libs/cker/include/cker/operation/Concatenation.h
new file mode 100644
index 000000000..1fabb9db7
--- /dev/null
+++ b/libs/cker/include/cker/operation/Concatenation.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_CKER_CONCATENATION_H_
+#define __NNFW_CKER_CONCATENATION_H_
+
+#include <cstdint>
+
+#include "cker/Shape.h"
+
+namespace nnfw
+{
+namespace cker
+{
+
+struct ConcatenationParams
+{
+  int8_t axis;
+  const int32_t *input_zeropoint;
+  const float *input_scale;
+  uint16_t inputs_count;
+  int32_t output_zeropoint;
+  float output_scale;
+};
+
+template <typename Scalar>
+inline void Concatenation(const ConcatenationParams &params, const Shape *const *input_shapes,
+                          const Scalar *const *input_data, const Shape &output_shape,
+                          Scalar *output_data)
+{
+  int axis = params.axis;
+  int inputs_count = params.inputs_count;
+  const int concat_dimensions = output_shape.DimensionsCount();
+  assert(axis < concat_dimensions);
+
+  int64_t concat_size = 0;
+  for (int i = 0; i < inputs_count; i++)
+  {
+    assert(input_shapes[i]->DimensionsCount() == concat_dimensions);
+    for (int j = 0; j < concat_dimensions; j++)
+    {
+      if (j != axis)
+      {
+        MatchingDim(*input_shapes[i], j, output_shape, j);
+      }
+    }
+    concat_size += input_shapes[i]->Dims(axis);
+  }
+  assert(concat_size == output_shape.Dims(axis));
+  int64_t outer_size = 1;
+  for (int i = 0; i < axis; ++i)
+  {
+    outer_size *= output_shape.Dims(i);
+  }
+  // For all input arrays,
+  // FlatSize() = outer_size * Dims(axis) * base_inner_size;
+  int64_t base_inner_size = 1;
+  for (int i = axis + 1; i < concat_dimensions; ++i)
+  {
+    base_inner_size *= output_shape.Dims(i);
+  }
+
+  Scalar *output_ptr = output_data;
+  for (int k = 0; k < outer_size; k++)
+  {
+    for (int i = 0; i < inputs_count; ++i)
+    {
+      const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size;
+      memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar));
+      output_ptr += copy_size;
+    }
+  }
+}
+
+} // namespace cker
+} // namespace nnfw
+
+#endif // __NNFW_CKER_CONCATENATION_H_
diff --git a/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt b/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt
index 6617d627e..1f35ce975 100644
--- a/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt
+++ b/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt
@@ -7,7 +7,7 @@ target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/e
 
 target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnapi-header)
 target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite)
-target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc)
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc nnfw_lib_cker)
 
 set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
 set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu)
diff --git a/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc
index dbe15fca6..16fe4eb61 100644
--- a/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc
+++ b/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc
@@ -17,9 +17,10 @@
 
 #include "ConcatLayer.h"
 
-#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
 #include "OperationUtils.h"
 
+#include <cker/operation/Concatenation.h>
+
 namespace neurun
 {
 namespace backend
@@ -40,18 +41,18 @@ bool ConcatLayer::concatenationFloat32()
 {
   uint32_t num_inputs = _inputShapes.size();
 
-  tflite::ConcatenationParams op_params;
+  nnfw::cker::ConcatenationParams op_params;
   op_params.axis = _axis;
   op_params.inputs_count = num_inputs;
 
-  std::vector<::tflite::RuntimeShape *> inputDimsPtr;
-  std::vector<::tflite::RuntimeShape> inputDims;
+  std::vector<nnfw::cker::Shape *> inputDimsPtr;
+  std::vector<nnfw::cker::Shape> inputDims;
   inputDimsPtr.reserve(num_inputs);
   inputDims.reserve(num_inputs);
 
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+    inputDims.push_back(convertShapeToCkerShape(_inputShapes[i]));
     inputDimsPtr.push_back(&inputDims[i]);
   }
 
@@ -62,9 +63,8 @@ bool ConcatLayer::concatenationFloat32()
     inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
   }
 
-  ::tflite::optimized_ops::Concatenation<float>(
-      op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
-      convertShapeToTFLiteShape(_outputShape), _outputData.f);
+  nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+                                   convertShapeToCkerShape(_outputShape), _outputData.f);
   return true;
 }
 bool ConcatLayer::concatenationQuant8()
@@ -79,7 +79,7 @@ bool ConcatLayer::concatenationQuant8()
     input_scales[i] = _inputShapes[i].scale;
   }
 
-  tflite::ConcatenationParams op_params;
+  nnfw::cker::ConcatenationParams op_params;
   op_params.axis = _axis;
   op_params.inputs_count = num_inputs;
   op_params.input_zeropoint = input_zeropoints.data();
@@ -87,19 +87,18 @@ bool ConcatLayer::concatenationQuant8()
   op_params.output_zeropoint = _outputShape.offset;
   op_params.output_scale = _outputShape.scale;
 
-  std::vector<::tflite::RuntimeShape *> inputDimsPtr;
-  std::vector<::tflite::RuntimeShape> inputDims;
+  std::vector<nnfw::cker::Shape *> inputDimsPtr;
+  std::vector<nnfw::cker::Shape> inputDims;
   inputDimsPtr.reserve(num_inputs);
   inputDims.reserve(num_inputs);
   for (uint32_t i = 0; i < num_inputs; i++)
   {
-    inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+    inputDims.push_back(convertShapeToCkerShape(_inputShapes[i]));
     inputDimsPtr.push_back(&inputDims[i]);
   }
 
-  ::tflite::optimized_ops::Concatenation<uint8_t>(
-      op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
-      convertShapeToTFLiteShape(_outputShape), _outputData.u8);
+  nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
+                                     convertShapeToCkerShape(_outputShape), _outputData.u8);
   return true;
 }
 
diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
index 0b0649ecd..f38cec072 100644
--- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
+++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h
@@ -23,6 +23,8 @@
 #include <limits>
 #include <vector>
 
+#include <cker/Shape.h>
+
 #include "tensorflow/contrib/lite/c/builtin_op_data.h"
 #include "tensorflow/contrib/lite/kernels/internal/types.h"
 #include "tensorflow/contrib/lite/kernels/internal/tensor.h"
@@ -107,6 +109,26 @@ inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape)
   return ::tflite::GetTensorShape(raw_shape);
 }
 
+inline nnfw::cker::Shape convertShapeToCkerShape(const Shape &shape)
+{
+  std::vector<int32_t> raw_shape;
+  raw_shape.resize(4);
+
+  for (uint32_t i = 0; i < 4; ++i)
+  {
+    if (i >= shape.dimensions.size())
+    {
+      raw_shape[i] = 1;
+    }
+    else
+    {
+      raw_shape[i] = shape.dimensions[i];
+    }
+  }
+
+  return nnfw::cker::GetShape(raw_shape);
+}
+
 inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
 {
   if (act == ANEURALNETWORKS_FUSED_NONE)
author	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>	2019-03-28 15:30:23 +0900
committer	GitHub Enterprise <noreply-CODE@samsung.com>	2019-03-28 15:30:23 +0900
commit	dee9a3d3c61c464241db6f6858609ca772759ad7 (patch)
tree	747017e967818fb60c8510000a09bbaf1ff74fd1
parent	b425807196464b75b4c2ad676e23cc65216f0bfe (diff)
download	nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.gz nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.bz2 nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.zip