diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2018-09-18 16:53:40 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2018-09-18 16:53:40 +0900 |
commit | 91f4ba45449f700a047a4aeea00b1a7c84e94c75 (patch) | |
tree | c60eecdba0861c51010fb0519f8a59668d90a6d2 /runtimes/neurun/src/kernel | |
parent | 07659ccd9fe7b1cf1547cc6cad78bcf489f0a361 (diff) | |
download | nnfw-91f4ba45449f700a047a4aeea00b1a7c84e94c75.tar.gz nnfw-91f4ba45449f700a047a4aeea00b1a7c84e94c75.tar.bz2 nnfw-91f4ba45449f700a047a4aeea00b1a7c84e94c75.zip |
Imported Upstream version 0.2upstream/0.2submit/tizen/20180918.075952
Diffstat (limited to 'runtimes/neurun/src/kernel')
29 files changed, 2588 insertions, 0 deletions
diff --git a/runtimes/neurun/src/kernel/CMakeLists.txt b/runtimes/neurun/src/kernel/CMakeLists.txt new file mode 100644 index 000000000..a39823102 --- /dev/null +++ b/runtimes/neurun/src/kernel/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(cpu) +add_subdirectory(acl_cl) diff --git a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt new file mode 100644 index 000000000..857fe6fe6 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt @@ -0,0 +1,15 @@ +file(GLOB SOURCES "*.cc") + +add_library(${LIB_NEURUN_KERNEL_ACL_CL} STATIC ${SOURCES}) + +target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NNFW_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NEURUN_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) # TODO We should not need this + +target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} arm_compute) +target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} tensorflow-lite) # TODO We should not need this +target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} ${LIB_NEURUN_KERNEL_CPU}) # TODO We should not need this + +set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES OUTPUT_NAME kernel_acl_cl) +install(TARGETS ${LIB_NEURUN_KERNEL_ACL_CL} DESTINATION lib/neurun) diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc new file mode 100644 index 000000000..b75ac90f0 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include <arm_compute/runtime/CL/CLScheduler.h> + +#include "backend/acl_cl/kernel/View.h" +#include "logging.h" + +namespace +{ + +bool matchSizeExceptAxis(const ::arm_compute::ICLTensor *t1, const ::arm_compute::ICLTensor *t2, + uint32_t axis) +{ + assert(t1->info()->num_dimensions() <= 4); + assert(t2->info()->num_dimensions() <= 4); + + for (uint32_t i = 0; i < 4; i++) + { + if (axis == i) + continue; + if (t1->info()->dimension(i) != t2->info()->dimension(i)) + return false; + } + return true; +} + +} // namespace {anonymous} + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +ConcatLayer::ConcatLayer() + : _input_allocs(), _output_alloc(nullptr), _axis(0), _input_type(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool ConcatLayer::concatenationFloat32() +{ + // Input and output size check + { + // NOTE Support only tensor with dimension 4 or less + + uint32_t axis_sum = 0; + + for (auto input : _input_allocs) + { + assert(matchSizeExceptAxis(_output_alloc, input, _axis)); + axis_sum += input->info()->dimension(_axis); + } + + assert(_output_alloc->info()->dimension(_axis) == axis_sum); + } + + VERBOSE(Concat_RUN) << "START Concat" << std::endl; + + // Perform operation + { + uint32_t axis_offset = 0; + + auto &queue = ::arm_compute::CLScheduler::get().queue(); + + _output_alloc->map(queue); + ::internal::arm_compute::kernel::View<float> output_view{_output_alloc}; + + for (auto input : _input_allocs) + { + input->map(queue); + const ::internal::arm_compute::kernel::View<float> input_reader{input}; + + for (uint32_t n = 0; n < input_reader.shape().N; n++) + { + for (uint32_t c = 0; c < input_reader.shape().C; c++) + { + for (uint32_t h = 0; h < input_reader.shape().H; h++) + { + for (uint32_t w = 0; w < input_reader.shape().W; w++) + { + uint32_t no = (_axis == 3) ? axis_offset : 0; + uint32_t co = (_axis == 2) ? axis_offset : 0; + uint32_t ho = (_axis == 1) ? axis_offset : 0; + uint32_t wo = (_axis == 0) ? axis_offset : 0; + output_view.at(n + no, c + co, h + ho, w + wo) = input_reader.at(n, c, h, w); + } + } + } + } + if (_axis == 3) + axis_offset += input_reader.shape().N; + if (_axis == 2) + axis_offset += input_reader.shape().C; + if (_axis == 1) + axis_offset += input_reader.shape().H; + if (_axis == 0) + axis_offset += input_reader.shape().W; + + input->unmap(queue); + } + _output_alloc->unmap(queue); + } + + VERBOSE(Concat_RUN) << "End Concat" << std::endl; + + return true; +} + +void ConcatLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs, + int32_t axis, ::arm_compute::ICLTensor *output_alloc) +{ + _input_allocs = input_allocs; + _output_alloc = output_alloc; + + assert(axis < 4); + + // This map converts NHWC to NCHW(reversed) + // NHWC -> WHCN + static const uint32_t axis_map[] = {3, 1, 0, 2}; + _axis = axis_map[axis]; + + // TODO Support Quant8 + _input_type = OperandType::TENSOR_FLOAT32; +} + +void ConcatLayer::run() +{ + if (_input_type == OperandType::TENSOR_FLOAT32) + { + concatenationFloat32(); + } + else if (_input_type == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error("NYI - concatenationQuant8()"); + } +} + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h new file mode 100644 index 000000000..4767721fa --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ +#define __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/core/CL/ICLTensor.h> +#include <arm_compute/runtime/IFunction.h> + +#include "graph/operand/DataType.h" + +using OperandType = neurun::graph::operand::DataType; + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +// +// neurun::kernel::acl_cl::ConcatLayer +// A naive implementation of ConcatLayer for ACL +// + +class ConcatLayer : public ::arm_compute::IFunction +{ +public: + ConcatLayer(); + +public: + void configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs, + int32_t axis /* NNAPI tensor axis from NHWC order */, + ::arm_compute::ICLTensor *output_alloc); + + void run(); + +private: + bool concatenationFloat32(); + +private: + std::vector<::arm_compute::ICLTensor *> _input_allocs; + ::arm_compute::ICLTensor *_output_alloc; + int32_t _axis; + OperandType _input_type; +}; + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif // __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc new file mode 100644 index 000000000..fa1d77579 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#include "TensorConvertFromCommonLayer.h" + +#include "internal/nnapi/feature/Reader.h" +#include "backend/acl_cl/feature/View.h" + +#include <util/feature/IndexIterator.h> +#include <arm_compute/runtime/CL/CLScheduler.h> + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +bool TensorConvertFromCommonLayer::convert() +{ + auto inputBuffer = _inputTensor->buffer(); + auto inputSize = _inputTensor->info()->total_size(); + + auto &queue = ::arm_compute::CLScheduler::get().queue(); + + _outputTensor->map(queue); + + if (_tensorShape.rank() == 2) + { + const auto len = _tensorShape.dim(1); + + auto base = reinterpret_cast<const float *>(inputBuffer); + + for (int32_t n = 0; n < len; ++n) + { + auto from = base + n; + auto into = + reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n})); + + *into = *from; + } + } + else if (_tensorShape.rank() == 4) + { + auto featureShape = _tensorShape.asFeature(); + + const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; + ::internal::arm_compute::feature::View<float> into{_outputTensor}; + + ::nnfw::util::feature::iterate(featureShape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } + + _outputTensor->unmap(queue); +} + +void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor, + ::arm_compute::ICLTensor *outputTensor, + const ::neurun::graph::operand::Shape &tensorShape) +{ + _inputTensor = inputTensor; + _outputTensor = outputTensor; + _tensorShape = tensorShape; +} + +void TensorConvertFromCommonLayer::run() { convert(); } + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h new file mode 100644 index 000000000..bd031a106 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ +#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/core/CL/ICLTensor.h> + +#include "internal/Model.h" +#include "internal/common/Tensor.h" + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +class TensorConvertFromCommonLayer : public ::arm_compute::IFunction +{ +public: + TensorConvertFromCommonLayer() {} + +public: + bool convert(); + + void configure(::internal::common::Tensor *inputTensor, ::arm_compute::ICLTensor *outputTensor, + const ::neurun::graph::operand::Shape &tensorShape); + + void run(); + +private: + ::internal::common::Tensor *_inputTensor; + ::arm_compute::ICLTensor *_outputTensor; + + ::neurun::graph::operand::Shape _tensorShape{1}; +}; + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ + +#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc new file mode 100644 index 000000000..985524bc3 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#include "TensorConvertToCommonLayer.h" + +#include "backend/acl_cl/feature/View.h" +#include "internal/nnapi/feature/View.h" + +#include <util/feature/IndexIterator.h> +#include <arm_compute/runtime/CL/CLScheduler.h> + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +bool TensorConvertToCommonLayer::convert() +{ + auto outputBuffer = _outputTensor->buffer(); + auto outputSize = _outputTensor->info()->total_size(); + + auto &queue = ::arm_compute::CLScheduler::get().queue(); + + _inputTensor->map(queue); + + if (_tensorShape.rank() == 2) + { + const auto len = _tensorShape.dim(1); + + auto base = reinterpret_cast<float *>(outputBuffer); + + for (int32_t n = 0; n < len; ++n) + { + auto from = reinterpret_cast<const float *>( + _inputTensor->ptr_to_element(::arm_compute::Coordinates{n})); + auto into = base + n; + + *into = *from; + } + } + else if (_tensorShape.rank() == 4) + { + auto featureShape = _tensorShape.asFeature(); + + const ::internal::arm_compute::feature::View<float> from{_inputTensor}; + ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; + + ::nnfw::util::feature::iterate(featureShape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } + + _inputTensor->unmap(queue); +} + +void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor, + ::internal::common::Tensor *outputTensor, + const ::neurun::graph::operand::Shape &tensorShape) +{ + _inputTensor = inputTensor; + _outputTensor = outputTensor; + _tensorShape = tensorShape; +} + +void TensorConvertToCommonLayer::run() { convert(); } + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h new file mode 100644 index 000000000..576f1ee71 --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ +#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/core/CL/ICLTensor.h> + +#include "internal/Model.h" +#include "internal/common/Tensor.h" + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +class TensorConvertToCommonLayer : public ::arm_compute::IFunction +{ +public: + TensorConvertToCommonLayer() {} + +public: + bool convert(); + + void configure(::arm_compute::ICLTensor *inputTensor, ::internal::common::Tensor *outputTensor, + const ::neurun::graph::operand::Shape &tensorShape); + + void run(); + +private: + ::arm_compute::ICLTensor *_inputTensor; + ::internal::common::Tensor *_outputTensor; + + ::neurun::graph::operand::Shape _tensorShape{1}; +}; + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ + +#endif diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc new file mode 100644 index 000000000..2a6a84e10 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AvgPoolLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +#define AVGPOOLING_PARAMETERS \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; + +AvgPoolLayer::AvgPoolLayer() + : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0), + _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE), + _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool AvgPoolLayer::averagePoolFloat32() +{ + + AVGPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + ::tflite::optimized_ops::AveragePool( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth, + _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight, + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape)); + return true; +} +bool AvgPoolLayer::averagePoolQuant8() +{ + + AVGPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); + + ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth, + _strideHeight, paddingWidth, paddingHeight, _kernelWidth, + _kernelHeight, output_activation_min, output_activation_max, + _outputData, convertShapeToDims(_outputShape)); + return true; +} + +void AvgPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const FuseCode activation, + uint8_t *outputData, const Shape outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _inputType = inputShape.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData = outputData; + _outputShape = outputShape; +} + +void AvgPoolLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + averagePoolFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"AvgPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // averagePoolQuant8(); + } +} + +#undef AVGPOOLING_PARAMETERS + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h new file mode 100644 index 000000000..9f390a9e1 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__ +#define __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class AvgPoolLayer : public ::arm_compute::IFunction +{ +public: + AvgPoolLayer(); + +public: + bool averagePoolFloat32(); + + bool averagePoolQuant8(); + + void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData, + const Shape outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_outputData; + + Shape _inputShape; + Shape _outputShape; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + FuseCode _activation; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt new file mode 100644 index 000000000..dddf154c3 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt @@ -0,0 +1,14 @@ +file(GLOB SOURCES "*.cc") + +add_library(${LIB_NEURUN_KERNEL_CPU} STATIC ${SOURCES}) + +target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NNFW_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NEURUN_INCLUDE_DIR}) +target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) + +target_link_libraries(${LIB_NEURUN_KERNEL_CPU} arm_compute) # TODO We should not need this +target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite) + +set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON) +set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu) +install(TARGETS ${LIB_NEURUN_KERNEL_CPU} DESTINATION lib/neurun) diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc new file mode 100644 index 000000000..5fe5e3993 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +ConcatLayer::ConcatLayer() + : _inputDataPtrs(), _outputData(nullptr), _axis(0), _inputShapes(), _outputShape(), + _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool ConcatLayer::concatenationFloat32() +{ + int num_inputs = _inputShapes.size(); + std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); + std::vector<::tflite::Dims<4>> inputDims(num_inputs); + for (int i = 0; i < num_inputs; i++) + { + inputDims[i] = convertShapeToDims(_inputShapes[i]); + inputDimsPtr[i] = &inputDims[i]; + } + + std::vector<const float *> inputFloatPtrs; + + for (auto ptr : _inputDataPtrs) + { + inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr)); + } + + ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>( + getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(), + num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape)); + return true; +} +bool ConcatLayer::concatenationQuant8() +{ + int num_inputs = _inputShapes.size(); + std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); + std::vector<::tflite::Dims<4>> inputDims(num_inputs); + for (int i = 0; i < num_inputs; i++) + { + inputDims[i] = convertShapeToDims(_inputShapes[i]); + inputDimsPtr[i] = &inputDims[i]; + } + ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>( + getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(), + num_inputs, _outputData, convertShapeToDims(_outputShape)); + return true; +} + +void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<Shape> &inputShapes, int32_t axis, + uint8_t *outputData, const Shape outputShape) +{ + _inputDataPtrs = inputDataPtrs; + + for (auto shape : inputShapes) + { + _inputShapes.emplace_back(shape); + _inputType = shape.type; + } + + _axis = axis; + + _outputData = outputData; + _outputShape = outputShape; +} + +void ConcatLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + concatenationFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"ConcatLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // concatenationQuant8(); + } +} + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h new file mode 100644 index 000000000..9aacab5e8 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_CONCATLAYER_H__ +#define __NEURUN_KERNEL_CPU_CONCATLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class ConcatLayer : public ::arm_compute::IFunction +{ +public: + ConcatLayer(); + +public: + bool concatenationFloat32(); + + bool concatenationQuant8(); + + void configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<Shape> &inputShapes, int32_t axis, uint8_t *outputData, + const Shape outputShape); + + void run(); + +private: + std::vector<const uint8_t *> _inputDataPtrs; + uint8_t *_outputData; + + int32_t _axis; + + std::vector<Shape> _inputShapes; + Shape _outputShape; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_CONCATLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc new file mode 100644 index 000000000..81e88e0f0 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +#include <mutex> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +// If possible we will use this static buffer for the tensor. +static constexpr int kStaticBufferSize = 1605632; +static char static_scratch_buffer[kStaticBufferSize]; +static std::mutex executionMutex; + +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ + uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; \ + \ + ::tflite::Dims<4> im2colDim; \ + im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ + im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ + im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ + im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ + \ + im2colDim.strides[0] = 1; \ + for (int i = 1; i < 4; i++) \ + { \ + im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ + } \ + Type *im2colData = nullptr; \ + uint64_t im2colByteSize = sizeof(Type); \ + std::unique_ptr<Type[]> im2colGuard; \ + for (int i = 0; i < 4; i++) \ + { \ + im2colByteSize *= im2colDim.sizes[i]; \ + } \ + /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ + if (im2colByteSize >= 0x7fffffff) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + if (im2colByteSize <= kStaticBufferSize) \ + { \ + im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ + } \ + else \ + { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + if (im2colData == nullptr) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + im2colGuard.reset(im2colData); \ + } + +ConvolutionLayer::ConvolutionLayer() + : _inputData(nullptr), _kernelData(nullptr), _outputData(nullptr), _biasData(nullptr), + _inputShape(), _kernelShape(), _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool ConvolutionLayer::convFloat32() +{ + ANDROID_NN_CONV_PARAMETERS(float) + + const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape); + const int kernel_width = ArraySize(kernel_dim, 1); + const int kernel_height = ArraySize(kernel_dim, 2); + const bool need_im2col = + _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1; + + float *im2colDataToPass = nullptr; + if (need_im2col) + { + im2colDataToPass = im2colData; + } + + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + ::tflite::optimized_ops::Conv( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), + reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape), + reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth, + _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight, + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape), im2colDataToPass, im2colDim); + return true; +} + +bool ConvolutionLayer::convQuant8() +{ + ANDROID_NN_CONV_PARAMETERS(uint8_t) + int32_t inputOffset = -_inputShape.offset; + int32_t kernelOffset = -_kernelShape.offset; + int32_t outputOffset = _outputShape.offset; + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape, + &real_multiplier) || + !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) + { + return false; + } + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); + static gemmlowp::GemmContext gemm_context; + // Prevent concurrent executions that may access the scratch buffer and + // gemm_context. + std::unique_lock<std::mutex> lock(executionMutex); + // Alow gemmlowp automatically decide how many threads to use. + gemm_context.set_max_num_threads(0); + ::tflite::optimized_ops::Conv( + _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData, + convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData), + convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight, + outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max, + _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context); + return true; +} + +void ConvolutionLayer::configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData, + const Shape kernelShape, uint8_t *biasData, const Shape biasShape, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const FuseCode activation, uint8_t *outputData, + const Shape outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _inputType = inputShape.type; + _kernelData = kernelData; + _kernelShape = kernelShape; + _biasData = biasData; + _biasShape = biasShape; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _activation = activation; + _outputData = outputData; + _outputShape = outputShape; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // convQuant8(); + } +} + +#undef ANDROID_NN_CONV_PARAMETERS + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h new file mode 100644 index 000000000..b7afbcec6 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__ +#define __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class ConvolutionLayer : public ::arm_compute::IFunction +{ +public: + ConvolutionLayer(); + +public: + bool convFloat32(); + + bool convQuant8(); + + void configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData, + const Shape kernelShape, uint8_t *biasData, const Shape biasShape, + const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + const FuseCode activation, uint8_t *outputData, const Shape outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_kernelData; + uint8_t *_outputData; + uint8_t *_biasData; + + Shape _inputShape; + Shape _kernelShape; + Shape _outputShape; + Shape _biasShape; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + FuseCode _activation; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc new file mode 100644 index 000000000..41b9afc0c --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "kernel/cpu/OperationUtils.h" + +#include <mutex> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _inputData(nullptr), _weightsData(nullptr), _biasData(nullptr), _outputData(nullptr), + _inputShape(), _weightsShape(), _biasShape(), _outputShape(), + _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +// executionMutex is used to protect concurrent access of non-threadsafe resources +// like gemmlowp::GemmContext. +// std::mutex is safe for pthreads on Android. +static std::mutex executionMutex; +bool FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + // b/80425683, optimized implementation produces incorrect results when the + // number of input elements is the squre of batch_size. + uint32_t batch_size = getSizeOfDimension(_outputShape, 0); + uint32_t input_n_elements = getNumberOfElements(_inputShape); + if (batch_size * batch_size == input_n_elements) + { + ::tflite::reference_ops::FullyConnected( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), + reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), + reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape)); + } + else + { + ::tflite::optimized_ops::FullyConnected( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), + reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), + reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape)); + } + return true; +} + +bool FullyConnectedLayer::fullyConnectedQuant8() +{ + int32_t inputOffset = -_inputShape.offset; + int32_t weightsOffset = -_weightsShape.offset; + int32_t outputOffset = _outputShape.offset; + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + // Caution : 'Convolution' can make misleading. It seems it is just math term. + if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape, + &real_multiplier) || + !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) + { + return false; + } + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); + static gemmlowp::GemmContext gemm_context; + // Prevent concurrent executions that access gemm_context. + std::unique_lock<std::mutex> lock(executionMutex); + // Alow gemmlowp automatically decide how many threads to use. + gemm_context.set_max_num_threads(0); + ::tflite::optimized_ops::FullyConnected( + _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData, + convertShapeToDims(_weightsShape), weightsOffset, + reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset, + output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData, + convertShapeToDims(_outputShape), &gemm_context); + return true; +} + +void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape, + uint8_t *weightsData, const Shape weightsShape, + uint8_t *biasData, const Shape biasShape, FuseCode activation, + uint8_t *outputData, const Shape outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _inputType = inputShape.type; + _weightsData = weightsData; + _weightsShape = weightsShape; + _biasData = biasData; + _biasShape = biasShape; + _activation = activation; + _outputData = outputData; + _outputShape = outputShape; +} + +void FullyConnectedLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + fullyConnectedFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // fullyConnectedQuant8(); + } +} + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h new file mode 100644 index 000000000..b1ba172b0 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__ +#define __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class FullyConnectedLayer : public ::arm_compute::IFunction +{ +public: + FullyConnectedLayer(); + +public: + bool fullyConnectedFloat32(); + + bool fullyConnectedQuant8(); + + void configure(uint8_t *inputData, const Shape inputShape, uint8_t *weightsData, + const Shape weightsShape, uint8_t *biasData, const Shape biasShape, + FuseCode activation, uint8_t *outputData, const Shape outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_weightsData; + uint8_t *_biasData; + uint8_t *_outputData; + + Shape _inputShape; + Shape _weightsShape; + Shape _biasShape; + Shape _outputShape; + + FuseCode _activation; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc new file mode 100644 index 000000000..3d96bb401 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MaxPoolLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +#define MAXPOOLING_PARAMETERS \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; + +MaxPoolLayer::MaxPoolLayer() + : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0), + _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), + _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE), + _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool MaxPoolLayer::maxPoolFloat32() +{ + + MAXPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + ::tflite::optimized_ops::MaxPool( + reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth, + _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight, + output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), + convertShapeToDims(_outputShape)); + return true; +} +bool MaxPoolLayer::maxPoolQuant8() +{ + + MAXPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, + &output_activation_max); + + ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth, + _strideHeight, paddingWidth, paddingHeight, _kernelWidth, + _kernelHeight, output_activation_min, output_activation_max, + _outputData, convertShapeToDims(_outputShape)); + return true; +} + +void MaxPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const FuseCode activation, + uint8_t *outputData, const Shape outputShape) +{ + _inputData = inputData; + + _inputShape = inputShape; + _inputType = inputShape.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData = outputData; + _outputShape = outputShape; +} + +void MaxPoolLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + maxPoolFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"MaxPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // maxPoolQuant8(); + } +} + +#undef MAXPOOLING_PARAMETERS + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h new file mode 100644 index 000000000..b42efb9f6 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__ +#define __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class MaxPoolLayer : public ::arm_compute::IFunction +{ +public: + MaxPoolLayer(); + +public: + bool maxPoolFloat32(); + + bool maxPoolQuant8(); + + void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData, + const Shape outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_outputData; + + Shape _inputShape; + Shape _outputShape; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + FuseCode _activation; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc new file mode 100644 index 000000000..5ec2f8e62 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kernel/cpu/OperationUtils.h" + +#include <cmath> +#include <algorithm> +#include <cassert> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); } + +uint32_t getNumberOfElements(const Shape &shape) +{ + uint32_t count = 1; + for (size_t i = 0; i < shape.dimensions.size(); i++) + { + count *= shape.dimensions[i]; + } + return count; +} + +uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx) +{ + if (dimensionIdx >= shape.dimensions.size()) + { + // TODO, log the error + return 0; + } + return shape.dimensions[dimensionIdx]; +} + +bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, + int32_t *right_shift) +{ + assert(double_multiplier >= 0.); + assert(double_multiplier < 1.); + if (double_multiplier == 0.) + { + *quantized_multiplier = 0; + *right_shift = 0; + return true; + } + assert(double_multiplier > 0.); + const double q = std::frexp(double_multiplier, right_shift); + *right_shift *= -1; + int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + --*right_shift; + } + assert(*right_shift >= 0); + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); + return true; +} + +bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape, + const Shape &biasShape, const Shape &outputShape, + float *multiplier) +{ + const float input_product_scale = inputShape.scale * filterShape.scale; + const float bias_scale = biasShape.scale; + const float output_scale = outputShape.scale; + // The following conditions must be guaranteed by the training pipeline. + assert(std::abs(input_product_scale - bias_scale) <= + 1e-6 * std::min(input_product_scale, bias_scale)); + assert(input_product_scale >= 0); + assert(input_product_scale < output_scale); + *multiplier = input_product_scale / output_scale; + return true; +} + +bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift) +{ + assert(double_multiplier > 1.); + const double q = std::frexp(double_multiplier, left_shift); + int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*left_shift; + } + assert(*left_shift >= 0); + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); + return true; +} + +void CalculateActivationRangeFloat(int32_t activation, float *activation_min, float *activation_max) +{ + if (activation == ANEURALNETWORKS_FUSED_RELU) + { + *activation_min = 0.f; + *activation_max = std::numeric_limits<float>::max(); + } + else if (activation == ANEURALNETWORKS_FUSED_RELU6) + { + *activation_min = 0.f; + *activation_max = 6.f; + } + else if (activation == ANEURALNETWORKS_FUSED_RELU1) + { + *activation_min = -1.f; + *activation_max = 1.f; + } + else if (activation == ANEURALNETWORKS_FUSED_NONE) + { + *activation_min = std::numeric_limits<float>::lowest(); + *activation_max = std::numeric_limits<float>::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min, + int32_t *act_max) +{ + const int32_t qmin = std::numeric_limits<uint8_t>::min(); + const int32_t qmax = std::numeric_limits<uint8_t>::max(); + const auto scale = outputShape.scale; + const auto zero_point = outputShape.offset; + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast<int32_t>(std::round(f / scale)); + }; + if (activation == ANEURALNETWORKS_FUSED_RELU) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = qmax; + } + else if (activation == ANEURALNETWORKS_FUSED_RELU6) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(6.0)); + } + else if (activation == ANEURALNETWORKS_FUSED_RELU1) + { + *act_min = std::max(qmin, quantize(-1.0)); + *act_max = std::min(qmax, quantize(1.0)); + } + else if (activation == ANEURALNETWORKS_FUSED_NONE) + { + *act_min = qmin; + *act_max = qmax; + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) +{ + const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * + (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast<int32_t>(std::floor(max_input_rescaled)); +} + +Shape getShape(const ::neurun::graph::operand::Object &o) +{ + Shape shape; + + shape.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type())); + shape.dimensions = std::vector<uint32_t>(o.shape().dims().begin(), o.shape().dims().end()); + shape.scale = o.typeInfo().scale(); + // shape.offset = _offset; + + return shape; +} + +size_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions) +{ + size_t size = 4; + + switch (type) + { + case OperandType::SCALAR_FLOAT32: + case OperandType::SCALAR_INT32: + case OperandType::SCALAR_UINT32: + case OperandType::TENSOR_FLOAT32: + case OperandType::TENSOR_INT32: + size = 4; + break; + case OperandType::TENSOR_QUANT8_ASYMM: + size = 1; + break; + default: + throw std::runtime_error("Not supported operand type."); + break; + } + + for (auto d : dimensions) + { + size *= d; + } + + return size; +} + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h new file mode 100644 index 000000000..5914d04e3 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ +#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ + +#include <NeuralNetworks.h> + +#include <iostream> +#include <limits> +#include <vector> + +#include "tensorflow/contrib/lite/kernels/internal/types.h" +#include "graph/operand/Object.h" +#include "graph/operand/DataType.h" + +using OperandType = neurun::graph::operand::DataType; + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +struct Shape +{ + OperandType type; + std::vector<uint32_t> dimensions; + float scale; + int32_t offset; +}; + +uint32_t getNumberOfDimensions(const Shape &shape); + +uint32_t getNumberOfElements(const Shape &shape); + +uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx); + +inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape) +{ + // nnAssert(shape.dimensions.size() <= 4); + ::tflite::Dims<4> dims; + // The dimensions are reversed in Dims<4>. + for (int i = 0; i < 4; ++i) + { + int src = static_cast<int>(shape.dimensions.size()) - i - 1; + if (src >= 0) + { + dims.sizes[i] = static_cast<int>(getSizeOfDimension(shape, src)); + } + else + { + dims.sizes[i] = 1; + } + } + dims.strides[0] = 1; + for (int i = 1; i < 4; i++) + { + dims.strides[i] = dims.strides[i - 1] * dims.sizes[i - 1]; + } + return dims; +} + +__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, + int32_t *right_shift); + +__wur bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape, + const Shape &biasShape, const Shape &outputShape, + float *multiplier); +__wur bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift); + +void CalculateActivationRangeFloat(int32_t activation, float *activation_min, + float *activation_max); + +void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min, + int32_t *act_max); + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift); + +Shape getShape(const ::neurun::graph::operand::Object &o); + +uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions); + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc new file mode 100644 index 000000000..377f783e0 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ReshapeLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +ReshapeLayer::ReshapeLayer() + : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape() +{ + // DO NOTHING +} + +bool ReshapeLayer::reshapeGeneric() +{ + size_t count = sizeOfData(_inputShape.type, _inputShape.dimensions); + memcpy(reinterpret_cast<void *>(_outputData), reinterpret_cast<const void *>(_inputData), count); + return true; +} + +void ReshapeLayer::configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData, + const Shape &outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _outputData = outputData; + _outputShape = outputShape; +} + +void ReshapeLayer::run() { reshapeGeneric(); } + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h new file mode 100644 index 000000000..395cc1d7f --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_RESHAPELAYER_H__ +#define __NEURUN_KERNEL_CPU_RESHAPELAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class ReshapeLayer : public ::arm_compute::IFunction +{ +public: + ReshapeLayer(); + +public: + bool reshapeGeneric(); + + void configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData, + const Shape &outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_outputData; + + Shape _inputShape; + Shape _outputShape; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_RESHAPELAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc new file mode 100644 index 000000000..4f5a69f2e --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SoftMaxLayer.h" + +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +SoftMaxLayer::SoftMaxLayer() + : _inputData(nullptr), _outputData(nullptr), _beta(0.0), _inputShape(), _outputShape(), + _inputType(OperandType::SCALAR_FLOAT32) +{ + // DO NOTHING +} + +bool SoftMaxLayer::softmaxFloat32() +{ + ::tflite::Dims<4> dim; + if (getNumberOfDimensions(_inputShape) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputShape, 0); + uint32_t input_size = getNumberOfElements(_inputShape) / batch_size; + Shape shapeIn4D; + shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; + dim = convertShapeToDims(shapeIn4D); + } + else if (getNumberOfDimensions(_inputShape) == 4) + { + dim = convertShapeToDims(_inputShape); + } + else + { + std::cout << "only 2D and 4D tensors supported" << std::endl; + return false; + } + ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta, + reinterpret_cast<float *>(_outputData), dim); + return true; +} + +bool SoftMaxLayer::softmaxQuant8() +{ + ::tflite::Dims<4> dim; + if (getNumberOfDimensions(_inputShape) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputShape, 0); + uint32_t input_size = getNumberOfElements(_inputShape) / batch_size; + Shape shapeIn4D; + shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; + dim = convertShapeToDims(shapeIn4D); + } + else if (getNumberOfDimensions(_inputShape) == 4) + { + dim = convertShapeToDims(_inputShape); + } + else + { + std::cout << "only 2D and 4D tensors supported" << std::endl; + return false; + } + if (_outputShape.offset != 0 || _outputShape.scale != 1.f / 256) + { + std::cout << "incorrect scale / offset for output" << std::endl; + return false; + } + static const int32_t kScaledDiffIntegerBits = 5; + const double input_beta_real_multiplier = std::min( + 1.0 * _beta * _inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0); + int32_t input_multiplier = 0; + int32_t input_left_shift = 0; + if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier, + &input_left_shift)) + { + return false; + } + float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift); + ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min, + _outputData, dim); + return true; +} + +void SoftMaxLayer::configure(uint8_t *inputData, const Shape &inputShape, const float beta, + uint8_t *outputData, const Shape &outputShape) +{ + _inputData = inputData; + _inputShape = inputShape; + _inputType = inputShape.type; + _outputData = outputData; + _outputShape = outputShape; + _beta = beta; +} + +void SoftMaxLayer::run() +{ + if (_inputType == OperandType::TENSOR_FLOAT32) + { + softmaxFloat32(); + } + else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM) + { + throw std::runtime_error{"SoftMaxLayer : Not tested for TENSOR_QUANT8_ASYMM"}; + // softmaxQuant8(); + } +} + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h new file mode 100644 index 000000000..8057be52f --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__ +#define __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "kernel/cpu/OperationUtils.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class SoftMaxLayer : public ::arm_compute::IFunction +{ +public: + SoftMaxLayer(); + +public: + bool softmaxFloat32(); + + bool softmaxQuant8(); + + void configure(uint8_t *inputData, const Shape &inputShape, const float beta, uint8_t *outputData, + const Shape &outputShape); + + void run(); + +private: + uint8_t *_inputData; + uint8_t *_outputData; + + float _beta; + + Shape _inputShape; + Shape _outputShape; + + OperandType _inputType; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc new file mode 100644 index 000000000..00e914732 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#include "TensorConvertFromCommonLayer.h" + +#include "internal/nnapi/feature/Reader.h" +#include "internal/nnapi/feature/View.h" + +#include <util/feature/IndexIterator.h> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +bool TensorConvertFromCommonLayer::convert() +{ + auto inputBuffer = _inputTensor->buffer(); + auto inputSize = _inputTensor->info()->total_size(); + + auto outputBuffer = _outputTensor->buffer(); + auto outputSize = _outputTensor->info()->total_size(); + + if (_tensorShape.rank() == 2) + { + const auto len = _tensorShape.dim(1); + + auto base = reinterpret_cast<const float *>(inputBuffer); + + for (int32_t n = 0; n < len; ++n) + { + auto from = base + n; + auto into = + reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n})); + + *into = *from; + } + } + else if (_tensorShape.rank() == 4) + { + auto featureShape = _tensorShape.asFeature(); + + const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; + ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; + + ::nnfw::util::feature::iterate(featureShape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } +} + +void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor, + ::internal::cpu::Tensor *outputTensor, + const Shape &tensorShape) +{ + _inputTensor = inputTensor; + _outputTensor = outputTensor; + _tensorShape = tensorShape; +} + +void TensorConvertFromCommonLayer::run() { convert(); } + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h new file mode 100644 index 000000000..56f7bcf32 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ +#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "internal/Model.h" +#include "internal/common/Tensor.h" +#include "internal/cpu.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class TensorConvertFromCommonLayer : public ::arm_compute::IFunction +{ +public: + TensorConvertFromCommonLayer() {} + +public: + bool convert(); + + void configure(::internal::common::Tensor *inputTensor, ::internal::cpu::Tensor *outputTensor, + const Shape &tensorShape); + + void run(); + +private: + ::internal::common::Tensor *_inputTensor; + ::internal::cpu::Tensor *_outputTensor; + + Shape _tensorShape{1}; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ + +#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc new file mode 100644 index 000000000..7d721f494 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#include "TensorConvertToCommonLayer.h" + +#include "internal/nnapi/feature/Reader.h" +#include "internal/nnapi/feature/View.h" + +#include <util/feature/IndexIterator.h> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +bool TensorConvertToCommonLayer::convert() +{ + auto inputBuffer = _inputTensor->buffer(); + auto inputSize = _inputTensor->info()->total_size(); + + auto outputBuffer = _outputTensor->buffer(); + auto outputSize = _outputTensor->info()->total_size(); + + if (_tensorShape.rank() == 2) + { + const auto len = _tensorShape.dim(1); + + auto base = reinterpret_cast<float *>(outputBuffer); + + for (int32_t n = 0; n < len; ++n) + { + auto from = reinterpret_cast<const float *>( + _inputTensor->ptr_to_element(::arm_compute::Coordinates{n})); + auto into = base + n; + + *into = *from; + } + } + else if (_tensorShape.rank() == 4) + { + auto featureShape = _tensorShape.asFeature(); + + const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; + ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; + + ::nnfw::util::feature::iterate(featureShape) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + } +} + +void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor, + ::internal::common::Tensor *outputTensor, + const Shape &tensorShape) +{ + _inputTensor = inputTensor; + _outputTensor = outputTensor; + _tensorShape = tensorShape; +} + +void TensorConvertToCommonLayer::run() { convert(); } + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h new file mode 100644 index 000000000..7e96d1aff --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE +// + +#if 0 + +#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ +#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ + +#include <NeuralNetworks.h> + +#include <arm_compute/runtime/IFunction.h> + +#include "internal/Model.h" +#include "internal/common/Tensor.h" +#include "internal/cpu.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class TensorConvertToCommonLayer : public ::arm_compute::IFunction +{ +public: + TensorConvertToCommonLayer() {} + +public: + bool convert(); + + void configure(::internal::cpu::Tensor *inputTensor, ::internal::common::Tensor *outputTensor, + const Shape &tensorShape); + + void run(); + +private: + ::internal::cpu::Tensor *_inputTensor; + ::internal::common::Tensor *_outputTensor; + + Shape _tensorShape{1}; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ + +#endif |