diff options
author | 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com> | 2019-03-28 15:30:23 +0900 |
---|---|---|
committer | GitHub Enterprise <noreply-CODE@samsung.com> | 2019-03-28 15:30:23 +0900 |
commit | dee9a3d3c61c464241db6f6858609ca772759ad7 (patch) | |
tree | 747017e967818fb60c8510000a09bbaf1ff74fd1 | |
parent | b425807196464b75b4c2ad676e23cc65216f0bfe (diff) | |
download | nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.gz nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.tar.bz2 nnfw-dee9a3d3c61c464241db6f6858609ca772759ad7.zip |
Introduce concat kernel (#4887)
Introduce concat kernel from tflite
Use concat kernel in cpu backend
Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
-rw-r--r-- | libs/cker/include/cker/Shape.h | 10 | ||||
-rw-r--r-- | libs/cker/include/cker/operation/Concatenation.h | 92 | ||||
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/CMakeLists.txt | 2 | ||||
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc | 29 | ||||
-rw-r--r-- | runtimes/neurun/backend/cpu/kernel/OperationUtils.h | 22 |
5 files changed, 139 insertions, 16 deletions
diff --git a/libs/cker/include/cker/Shape.h b/libs/cker/include/cker/Shape.h index b6f0602be..d4f54350b 100644 --- a/libs/cker/include/cker/Shape.h +++ b/libs/cker/include/cker/Shape.h @@ -205,6 +205,16 @@ private: }; }; +inline int MatchingDim(const Shape &shape1, int index1, const Shape &shape2, int index2) +{ + UNUSED_RELEASE(shape2); + UNUSED_RELEASE(index2); + assert(shape1.Dims(index1) == shape2.Dims(index2)); + return shape1.Dims(index1); +} + +inline Shape GetShape(std::vector<int32_t> data) { return Shape(data.size(), data.data()); } + } // namespace cker } // namespace nnfw diff --git a/libs/cker/include/cker/operation/Concatenation.h b/libs/cker/include/cker/operation/Concatenation.h new file mode 100644 index 000000000..1fabb9db7 --- /dev/null +++ b/libs/cker/include/cker/operation/Concatenation.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2017 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_CKER_CONCATENATION_H_ +#define __NNFW_CKER_CONCATENATION_H_ + +#include <cstdint> + +#include "cker/Shape.h" + +namespace nnfw +{ +namespace cker +{ + +struct ConcatenationParams +{ + int8_t axis; + const int32_t *input_zeropoint; + const float *input_scale; + uint16_t inputs_count; + int32_t output_zeropoint; + float output_scale; +}; + +template <typename Scalar> +inline void Concatenation(const ConcatenationParams ¶ms, const Shape *const *input_shapes, + const Scalar *const *input_data, const Shape &output_shape, + Scalar *output_data) +{ + int axis = params.axis; + int inputs_count = params.inputs_count; + const int concat_dimensions = output_shape.DimensionsCount(); + assert(axis < concat_dimensions); + + int64_t concat_size = 0; + for (int i = 0; i < inputs_count; i++) + { + assert(input_shapes[i]->DimensionsCount() == concat_dimensions); + for (int j = 0; j < concat_dimensions; j++) + { + if (j != axis) + { + MatchingDim(*input_shapes[i], j, output_shape, j); + } + } + concat_size += input_shapes[i]->Dims(axis); + } + assert(concat_size == output_shape.Dims(axis)); + int64_t outer_size = 1; + for (int i = 0; i < axis; ++i) + { + outer_size *= output_shape.Dims(i); + } + // For all input arrays, + // FlatSize() = outer_size * Dims(axis) * base_inner_size; + int64_t base_inner_size = 1; + for (int i = axis + 1; i < concat_dimensions; ++i) + { + base_inner_size *= output_shape.Dims(i); + } + + Scalar *output_ptr = output_data; + for (int k = 0; k < outer_size; k++) + { + for (int i = 0; i < inputs_count; ++i) + { + const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; + memcpy(output_ptr, input_data[i] + k * copy_size, copy_size * sizeof(Scalar)); + output_ptr += copy_size; + } + } +} + +} // namespace cker +} // namespace nnfw + +#endif // __NNFW_CKER_CONCATENATION_H_ diff --git a/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt b/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt index 6617d627e..1f35ce975 100644 --- a/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt +++ b/runtimes/neurun/backend/cpu/kernel/CMakeLists.txt @@ -7,7 +7,7 @@ target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/e target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnapi-header) target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite) -target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc) +target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc nnfw_lib_cker) set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu) diff --git a/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc index dbe15fca6..16fe4eb61 100644 --- a/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc +++ b/runtimes/neurun/backend/cpu/kernel/ConcatLayer.cc @@ -17,9 +17,10 @@ #include "ConcatLayer.h" -#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" #include "OperationUtils.h" +#include <cker/operation/Concatenation.h> + namespace neurun { namespace backend @@ -40,18 +41,18 @@ bool ConcatLayer::concatenationFloat32() { uint32_t num_inputs = _inputShapes.size(); - tflite::ConcatenationParams op_params; + nnfw::cker::ConcatenationParams op_params; op_params.axis = _axis; op_params.inputs_count = num_inputs; - std::vector<::tflite::RuntimeShape *> inputDimsPtr; - std::vector<::tflite::RuntimeShape> inputDims; + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; inputDimsPtr.reserve(num_inputs); inputDims.reserve(num_inputs); for (uint32_t i = 0; i < num_inputs; i++) { - inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i])); + inputDims.push_back(convertShapeToCkerShape(_inputShapes[i])); inputDimsPtr.push_back(&inputDims[i]); } @@ -62,9 +63,8 @@ bool ConcatLayer::concatenationFloat32() inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr)); } - ::tflite::optimized_ops::Concatenation<float>( - op_params, inputDimsPtr.data(), inputFloatPtrs.data(), - convertShapeToTFLiteShape(_outputShape), _outputData.f); + nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(), + convertShapeToCkerShape(_outputShape), _outputData.f); return true; } bool ConcatLayer::concatenationQuant8() @@ -79,7 +79,7 @@ bool ConcatLayer::concatenationQuant8() input_scales[i] = _inputShapes[i].scale; } - tflite::ConcatenationParams op_params; + nnfw::cker::ConcatenationParams op_params; op_params.axis = _axis; op_params.inputs_count = num_inputs; op_params.input_zeropoint = input_zeropoints.data(); @@ -87,19 +87,18 @@ bool ConcatLayer::concatenationQuant8() op_params.output_zeropoint = _outputShape.offset; op_params.output_scale = _outputShape.scale; - std::vector<::tflite::RuntimeShape *> inputDimsPtr; - std::vector<::tflite::RuntimeShape> inputDims; + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; inputDimsPtr.reserve(num_inputs); inputDims.reserve(num_inputs); for (uint32_t i = 0; i < num_inputs; i++) { - inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i])); + inputDims.push_back(convertShapeToCkerShape(_inputShapes[i])); inputDimsPtr.push_back(&inputDims[i]); } - ::tflite::optimized_ops::Concatenation<uint8_t>( - op_params, inputDimsPtr.data(), _inputDataPtrs.data(), - convertShapeToTFLiteShape(_outputShape), _outputData.u8); + nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(), + convertShapeToCkerShape(_outputShape), _outputData.u8); return true; } diff --git a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h index 0b0649ecd..f38cec072 100644 --- a/runtimes/neurun/backend/cpu/kernel/OperationUtils.h +++ b/runtimes/neurun/backend/cpu/kernel/OperationUtils.h @@ -23,6 +23,8 @@ #include <limits> #include <vector> +#include <cker/Shape.h> + #include "tensorflow/contrib/lite/c/builtin_op_data.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" #include "tensorflow/contrib/lite/kernels/internal/tensor.h" @@ -107,6 +109,26 @@ inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape) return ::tflite::GetTensorShape(raw_shape); } +inline nnfw::cker::Shape convertShapeToCkerShape(const Shape &shape) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= shape.dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = shape.dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + inline TfLiteFusedActivation convertFusedActivation(FuseCode act) { if (act == ANEURALNETWORKS_FUSED_NONE) |