diff options
Diffstat (limited to 'runtimes/neurun/src/kernel')
30 files changed, 660 insertions, 861 deletions
diff --git a/runtimes/neurun/src/kernel/acl_cl/CLFunction.h b/runtimes/neurun/src/kernel/acl_cl/CLFunction.h new file mode 100644 index 000000000..f34210c8a --- /dev/null +++ b/runtimes/neurun/src/kernel/acl_cl/CLFunction.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__ +#define __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__ + +#include "exec/interface/IFunction.h" +#include <arm_compute/runtime/IFunction.h> +#include <memory> + +namespace neurun +{ +namespace kernel +{ +namespace acl_cl +{ + +class CLFunction : public ::neurun::exec::IFunction +{ +public: + CLFunction() = delete; + +public: + CLFunction(std::unique_ptr<::arm_compute::IFunction> &&func) + : _func(std::forward<std::unique_ptr<::arm_compute::IFunction>>(func)) + { + // DO NOTHING + } + +public: + void run() override { _func->run(); } + void prepare() override { _func->prepare(); } + +private: + std::unique_ptr<::arm_compute::IFunction> _func; +}; + +} // namespace acl_cl +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_ACL_CL_CL_FUNCTION_H__ diff --git a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt index 857fe6fe6..0658effea 100644 --- a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt +++ b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt @@ -4,11 +4,9 @@ add_library(${LIB_NEURUN_KERNEL_ACL_CL} STATIC ${SOURCES}) target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NNFW_INCLUDE_DIR}) target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NEURUN_INCLUDE_DIR}) -target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) # TODO We should not need this target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} arm_compute) -target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} tensorflow-lite) # TODO We should not need this -target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} ${LIB_NEURUN_KERNEL_CPU}) # TODO We should not need this +target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} nnfw_lib_misc) set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES OUTPUT_NAME kernel_acl_cl) diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc index b75ac90f0..3844317ab 100644 --- a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc +++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc @@ -18,23 +18,23 @@ #include <arm_compute/runtime/CL/CLScheduler.h> -#include "backend/acl_cl/kernel/View.h" -#include "logging.h" +#include "util/feature/nchw/View.h" +#include "util/logging.h" namespace { -bool matchSizeExceptAxis(const ::arm_compute::ICLTensor *t1, const ::arm_compute::ICLTensor *t2, - uint32_t axis) +bool matchSizeExceptAxis(const ::neurun::backend::acl_cl::operand::ICLTensor *t1, + const ::neurun::backend::acl_cl::operand::ICLTensor *t2, uint32_t axis) { - assert(t1->info()->num_dimensions() <= 4); - assert(t2->info()->num_dimensions() <= 4); + assert(t1->num_dimensions() <= 4); + assert(t2->num_dimensions() <= 4); for (uint32_t i = 0; i < 4; i++) { if (axis == i) continue; - if (t1->info()->dimension(i) != t2->info()->dimension(i)) + if (t1->dimension(i) != t2->dimension(i)) return false; } return true; @@ -66,10 +66,10 @@ bool ConcatLayer::concatenationFloat32() for (auto input : _input_allocs) { assert(matchSizeExceptAxis(_output_alloc, input, _axis)); - axis_sum += input->info()->dimension(_axis); + axis_sum += input->dimension(_axis); } - assert(_output_alloc->info()->dimension(_axis) == axis_sum); + assert(_output_alloc->dimension(_axis) == axis_sum); } VERBOSE(Concat_RUN) << "START Concat" << std::endl; @@ -81,12 +81,12 @@ bool ConcatLayer::concatenationFloat32() auto &queue = ::arm_compute::CLScheduler::get().queue(); _output_alloc->map(queue); - ::internal::arm_compute::kernel::View<float> output_view{_output_alloc}; + util::feature::nchw::View<float> output_view{_output_alloc}; for (auto input : _input_allocs) { input->map(queue); - const ::internal::arm_compute::kernel::View<float> input_reader{input}; + const util::feature::nchw::View<float> input_reader{input}; for (uint32_t n = 0; n < input_reader.shape().N; n++) { @@ -124,8 +124,9 @@ bool ConcatLayer::concatenationFloat32() return true; } -void ConcatLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs, - int32_t axis, ::arm_compute::ICLTensor *output_alloc) +void ConcatLayer::configure( + const std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> &input_allocs, int32_t axis, + ::neurun::backend::acl_cl::operand::ICLTensor *output_alloc) { _input_allocs = input_allocs; _output_alloc = output_alloc; diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h index 4767721fa..d468a6dfb 100644 --- a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h +++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h @@ -14,17 +14,17 @@ * limitations under the License. */ -#ifndef __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ -#define __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ +#ifndef __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__ +#define __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__ #include <NeuralNetworks.h> -#include <arm_compute/core/CL/ICLTensor.h> #include <arm_compute/runtime/IFunction.h> -#include "graph/operand/DataType.h" +#include "model/operand/DataType.h" +#include "backend/acl_cl/operand/ICLTensor.h" -using OperandType = neurun::graph::operand::DataType; +using OperandType = neurun::model::operand::DataType; namespace neurun { @@ -44,9 +44,9 @@ public: ConcatLayer(); public: - void configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs, + void configure(const std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> &input_allocs, int32_t axis /* NNAPI tensor axis from NHWC order */, - ::arm_compute::ICLTensor *output_alloc); + ::neurun::backend::acl_cl::operand::ICLTensor *output_alloc); void run(); @@ -54,8 +54,8 @@ private: bool concatenationFloat32(); private: - std::vector<::arm_compute::ICLTensor *> _input_allocs; - ::arm_compute::ICLTensor *_output_alloc; + std::vector<::neurun::backend::acl_cl::operand::ICLTensor *> _input_allocs; + ::neurun::backend::acl_cl::operand::ICLTensor *_output_alloc; int32_t _axis; OperandType _input_type; }; @@ -64,4 +64,4 @@ private: } // namespace kernel } // namespace neurun -#endif // __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__ +#endif // __NEURUN_KERNEL_ACL_CL_CONCAT_LAYER_H__ diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc deleted file mode 100644 index fa1d77579..000000000 --- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#include "TensorConvertFromCommonLayer.h" - -#include "internal/nnapi/feature/Reader.h" -#include "backend/acl_cl/feature/View.h" - -#include <util/feature/IndexIterator.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -namespace neurun -{ -namespace kernel -{ -namespace acl_cl -{ - -bool TensorConvertFromCommonLayer::convert() -{ - auto inputBuffer = _inputTensor->buffer(); - auto inputSize = _inputTensor->info()->total_size(); - - auto &queue = ::arm_compute::CLScheduler::get().queue(); - - _outputTensor->map(queue); - - if (_tensorShape.rank() == 2) - { - const auto len = _tensorShape.dim(1); - - auto base = reinterpret_cast<const float *>(inputBuffer); - - for (int32_t n = 0; n < len; ++n) - { - auto from = base + n; - auto into = - reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n})); - - *into = *from; - } - } - else if (_tensorShape.rank() == 4) - { - auto featureShape = _tensorShape.asFeature(); - - const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; - ::internal::arm_compute::feature::View<float> into{_outputTensor}; - - ::nnfw::util::feature::iterate(featureShape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - } - - _outputTensor->unmap(queue); -} - -void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor, - ::arm_compute::ICLTensor *outputTensor, - const ::neurun::graph::operand::Shape &tensorShape) -{ - _inputTensor = inputTensor; - _outputTensor = outputTensor; - _tensorShape = tensorShape; -} - -void TensorConvertFromCommonLayer::run() { convert(); } - -} // namespace acl_cl -} // namespace kernel -} // namespace neurun - -#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h deleted file mode 100644 index bd031a106..000000000 --- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ -#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ - -#include <NeuralNetworks.h> - -#include <arm_compute/runtime/IFunction.h> -#include <arm_compute/core/CL/ICLTensor.h> - -#include "internal/Model.h" -#include "internal/common/Tensor.h" - -namespace neurun -{ -namespace kernel -{ -namespace acl_cl -{ - -class TensorConvertFromCommonLayer : public ::arm_compute::IFunction -{ -public: - TensorConvertFromCommonLayer() {} - -public: - bool convert(); - - void configure(::internal::common::Tensor *inputTensor, ::arm_compute::ICLTensor *outputTensor, - const ::neurun::graph::operand::Shape &tensorShape); - - void run(); - -private: - ::internal::common::Tensor *_inputTensor; - ::arm_compute::ICLTensor *_outputTensor; - - ::neurun::graph::operand::Shape _tensorShape{1}; -}; - -} // namespace acl_cl -} // namespace kernel -} // namespace neurun - -#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ - -#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc deleted file mode 100644 index 985524bc3..000000000 --- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#include "TensorConvertToCommonLayer.h" - -#include "backend/acl_cl/feature/View.h" -#include "internal/nnapi/feature/View.h" - -#include <util/feature/IndexIterator.h> -#include <arm_compute/runtime/CL/CLScheduler.h> - -namespace neurun -{ -namespace kernel -{ -namespace acl_cl -{ - -bool TensorConvertToCommonLayer::convert() -{ - auto outputBuffer = _outputTensor->buffer(); - auto outputSize = _outputTensor->info()->total_size(); - - auto &queue = ::arm_compute::CLScheduler::get().queue(); - - _inputTensor->map(queue); - - if (_tensorShape.rank() == 2) - { - const auto len = _tensorShape.dim(1); - - auto base = reinterpret_cast<float *>(outputBuffer); - - for (int32_t n = 0; n < len; ++n) - { - auto from = reinterpret_cast<const float *>( - _inputTensor->ptr_to_element(::arm_compute::Coordinates{n})); - auto into = base + n; - - *into = *from; - } - } - else if (_tensorShape.rank() == 4) - { - auto featureShape = _tensorShape.asFeature(); - - const ::internal::arm_compute::feature::View<float> from{_inputTensor}; - ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; - - ::nnfw::util::feature::iterate(featureShape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - } - - _inputTensor->unmap(queue); -} - -void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor, - ::internal::common::Tensor *outputTensor, - const ::neurun::graph::operand::Shape &tensorShape) -{ - _inputTensor = inputTensor; - _outputTensor = outputTensor; - _tensorShape = tensorShape; -} - -void TensorConvertToCommonLayer::run() { convert(); } - -} // namespace acl_cl -} // namespace kernel -} // namespace neurun - -#endif diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h deleted file mode 100644 index 576f1ee71..000000000 --- a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ -#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ - -#include <NeuralNetworks.h> - -#include <arm_compute/runtime/IFunction.h> -#include <arm_compute/core/CL/ICLTensor.h> - -#include "internal/Model.h" -#include "internal/common/Tensor.h" - -namespace neurun -{ -namespace kernel -{ -namespace acl_cl -{ - -class TensorConvertToCommonLayer : public ::arm_compute::IFunction -{ -public: - TensorConvertToCommonLayer() {} - -public: - bool convert(); - - void configure(::arm_compute::ICLTensor *inputTensor, ::internal::common::Tensor *outputTensor, - const ::neurun::graph::operand::Shape &tensorShape); - - void run(); - -private: - ::arm_compute::ICLTensor *_inputTensor; - ::internal::common::Tensor *_outputTensor; - - ::neurun::graph::operand::Shape _tensorShape{1}; -}; - -} // namespace acl_cl -} // namespace kernel -} // namespace neurun - -#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__ - -#endif diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc index 2a6a84e10..f434a6dec 100644 --- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc @@ -27,14 +27,14 @@ namespace kernel namespace cpu { -#define AVGPOOLING_PARAMETERS \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; +#define AVGPOOLING_PARAMETERS \ + tflite::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; AvgPoolLayer::AvgPoolLayer() : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0), @@ -47,31 +47,31 @@ AvgPoolLayer::AvgPoolLayer() bool AvgPoolLayer::averagePoolFloat32() { - AVGPOOLING_PARAMETERS float output_activation_min, output_activation_max; CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; - ::tflite::optimized_ops::AveragePool( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth, - _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight, - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); + ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape), + reinterpret_cast<const float *>(_inputData), + convertShapeToTFLiteShape(_outputShape), + reinterpret_cast<float *>(_outputData)); return true; } bool AvgPoolLayer::averagePoolQuant8() { - AVGPOOLING_PARAMETERS int32_t output_activation_min = 0; int32_t output_activation_max = 0; CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; - ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth, - _strideHeight, paddingWidth, paddingHeight, _kernelWidth, - _kernelHeight, output_activation_min, output_activation_max, - _outputData, convertShapeToDims(_outputShape)); + ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape), + _inputData, convertShapeToTFLiteShape(_outputShape), + _outputData); return true; } diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h index 9f390a9e1..280f7ae5f 100644 --- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h +++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class AvgPoolLayer : public ::arm_compute::IFunction +class AvgPoolLayer : public ::neurun::exec::IFunction { public: AvgPoolLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt index dddf154c3..436cb898c 100644 --- a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt +++ b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt @@ -6,8 +6,8 @@ target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NNFW_INCLUDE_DIR}) target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NEURUN_INCLUDE_DIR}) target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) -target_link_libraries(${LIB_NEURUN_KERNEL_CPU} arm_compute) # TODO We should not need this target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite) +target_link_libraries(${LIB_NEURUN_KERNEL_CPU} nnfw_lib_misc) set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu) diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc index 5fe5e3993..be093b437 100644 --- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc @@ -24,6 +24,7 @@ namespace neurun { namespace kernel { + namespace cpu { @@ -36,13 +37,21 @@ ConcatLayer::ConcatLayer() bool ConcatLayer::concatenationFloat32() { - int num_inputs = _inputShapes.size(); - std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); - std::vector<::tflite::Dims<4>> inputDims(num_inputs); - for (int i = 0; i < num_inputs; i++) + uint32_t num_inputs = _inputShapes.size(); + + tflite::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + + std::vector<::tflite::RuntimeShape *> inputDimsPtr; + std::vector<::tflite::RuntimeShape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + + for (uint32_t i = 0; i < num_inputs; i++) { - inputDims[i] = convertShapeToDims(_inputShapes[i]); - inputDimsPtr[i] = &inputDims[i]; + inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i])); + inputDimsPtr.push_back(&inputDims[i]); } std::vector<const float *> inputFloatPtrs; @@ -52,24 +61,44 @@ bool ConcatLayer::concatenationFloat32() inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr)); } - ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>( - getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(), - num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape)); + ::tflite::optimized_ops::Concatenation<float>( + op_params, inputDimsPtr.data(), inputFloatPtrs.data(), + convertShapeToTFLiteShape(_outputShape), reinterpret_cast<float *>(_outputData)); return true; } bool ConcatLayer::concatenationQuant8() { int num_inputs = _inputShapes.size(); - std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs); - std::vector<::tflite::Dims<4>> inputDims(num_inputs); - for (int i = 0; i < num_inputs; i++) + + std::vector<int32_t> input_zeropoints(num_inputs); + std::vector<float> input_scales(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) { - inputDims[i] = convertShapeToDims(_inputShapes[i]); - inputDimsPtr[i] = &inputDims[i]; + input_zeropoints[i] = _inputShapes[i].offset; + input_scales[i] = _inputShapes[i].scale; } - ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>( - getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(), - num_inputs, _outputData, convertShapeToDims(_outputShape)); + + tflite::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + op_params.input_zeropoint = input_zeropoints.data(); + op_params.input_scale = input_scales.data(); + op_params.output_zeropoint = _outputShape.offset; + op_params.output_scale = _outputShape.scale; + + std::vector<::tflite::RuntimeShape *> inputDimsPtr; + std::vector<::tflite::RuntimeShape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) + { + inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i])); + inputDimsPtr.push_back(&inputDims[i]); + } + + ::tflite::optimized_ops::Concatenation<uint8_t>( + op_params, inputDimsPtr.data(), _inputDataPtrs.data(), + convertShapeToTFLiteShape(_outputShape), _outputData); return true; } diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h index 9aacab5e8..64f813508 100644 --- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h +++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h @@ -20,7 +20,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -31,7 +31,7 @@ namespace kernel namespace cpu { -class ConcatLayer : public ::arm_compute::IFunction +class ConcatLayer : public ::neurun::exec::IFunction { public: ConcatLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc index 81e88e0f0..c694fa75f 100644 --- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc @@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632; static char static_scratch_buffer[kStaticBufferSize]; static std::mutex executionMutex; -#define ANDROID_NN_CONV_PARAMETERS(Type) \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ - uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; \ - \ - ::tflite::Dims<4> im2colDim; \ - im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ - im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ - im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ - im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ - \ - im2colDim.strides[0] = 1; \ - for (int i = 1; i < 4; i++) \ - { \ - im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ - } \ - Type *im2colData = nullptr; \ - uint64_t im2colByteSize = sizeof(Type); \ - std::unique_ptr<Type[]> im2colGuard; \ - for (int i = 0; i < 4; i++) \ - { \ - im2colByteSize *= im2colDim.sizes[i]; \ - } \ - /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ - if (im2colByteSize >= 0x7fffffff) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - if (im2colByteSize <= kStaticBufferSize) \ - { \ - im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ - } \ - else \ - { \ - im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ - if (im2colData == nullptr) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - im2colGuard.reset(im2colData); \ +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ + uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; \ + \ + Shape im2colShape; \ + im2colShape.dimensions.resize(4); \ + im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \ + im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \ + im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \ + im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \ + \ + Type *im2colData = nullptr; \ + uint64_t im2colByteSize = sizeof(Type); \ + std::unique_ptr<Type[]> im2colGuard; \ + for (int i = 0; i < 4; i++) \ + { \ + im2colByteSize *= im2colShape.dimensions[i]; \ + } \ + /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ + if (im2colByteSize >= 0x7fffffff) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + if (im2colByteSize <= kStaticBufferSize) \ + { \ + im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ + } \ + else \ + { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + if (im2colData == nullptr) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + im2colGuard.reset(im2colData); \ } ConvolutionLayer::ConvolutionLayer() @@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32() float output_activation_min, output_activation_max; CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + + ::tflite::ConvParams op_params; + op_params.padding_type = ::tflite::PaddingType::kSame; + op_params.padding_values.width = paddingWidth; + op_params.padding_values.height = paddingHeight; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = dilationWidthFactor; + op_params.dilation_height_factor = dilationHeightFactor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + ::tflite::optimized_ops::Conv( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth, - _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight, - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape), im2colDataToPass, im2colDim); + op_params, convertShapeToTFLiteShape(_inputShape), + reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape), + reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape), + reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape), + reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape), + im2colDataToPass); return true; } bool ConvolutionLayer::convQuant8() { ANDROID_NN_CONV_PARAMETERS(uint8_t) + int32_t inputOffset = -_inputShape.offset; int32_t kernelOffset = -_kernelShape.offset; int32_t outputOffset = _outputShape.offset; @@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8() } CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, &output_activation_max); + int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + + ::tflite::ConvParams op_params; + op_params.padding_type = ::tflite::PaddingType::kSame; + op_params.padding_values.width = paddingWidth; + op_params.padding_values.height = paddingHeight; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = dilationWidthFactor; + op_params.dilation_height_factor = dilationHeightFactor; + op_params.input_offset = inputOffset; + op_params.weights_offset = kernelOffset; + op_params.output_offset = outputOffset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + static gemmlowp::GemmContext gemm_context; // Prevent concurrent executions that may access the scratch buffer and // gemm_context. @@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8() // Alow gemmlowp automatically decide how many threads to use. gemm_context.set_max_num_threads(0); ::tflite::optimized_ops::Conv( - _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData, - convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData), - convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight, - outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max, - _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context); + op_params, convertShapeToTFLiteShape(_inputShape), _inputData, + convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape), + reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape), + _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context); return true; } diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h index b7afbcec6..9b7f55ff1 100644 --- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class ConvolutionLayer : public ::arm_compute::IFunction +class ConvolutionLayer : public ::neurun::exec::IFunction { public: ConvolutionLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc index 41b9afc0c..abe82db5e 100644 --- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc @@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer() static std::mutex executionMutex; bool FullyConnectedLayer::fullyConnectedFloat32() { - float output_activation_min, output_activation_max; - CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); - // b/80425683, optimized implementation produces incorrect results when the - // number of input elements is the squre of batch_size. - uint32_t batch_size = getSizeOfDimension(_outputShape, 0); - uint32_t input_n_elements = getNumberOfElements(_inputShape); - if (batch_size * batch_size == input_n_elements) + int total_input_size = 1; + for (int i = 0; i < _inputShape.dimensions.size(); i++) { - ::tflite::reference_ops::FullyConnected( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); - } - else - { - ::tflite::optimized_ops::FullyConnected( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); + total_input_size *= _inputShape.dimensions[i]; } + + int input_size = _weightsShape.dimensions[1]; + const int batch_size = total_input_size / input_size; + const int num_units = _weightsShape.dimensions[0]; + + TfLiteFusedActivation act = convertFusedActivation(_activation); + + ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData), + num_units, batch_size, + reinterpret_cast<float *>(_outputData)); + + // Compute output += weight * input + ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( + reinterpret_cast<const float *>(_weightsData), num_units, input_size, + reinterpret_cast<const float *>(_inputData), batch_size, + reinterpret_cast<float *>(_outputData), /*result_stride=*/1); + + // Apply activation function + ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData), + batch_size * num_units, act, + reinterpret_cast<float *>(_outputData)); + return true; } bool FullyConnectedLayer::fullyConnectedQuant8() { - int32_t inputOffset = -_inputShape.offset; - int32_t weightsOffset = -_weightsShape.offset; - int32_t outputOffset = _outputShape.offset; - float real_multiplier = 0.0; - int32_t output_multiplier = 0; - int32_t output_shift = 0; - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - // Caution : 'Convolution' can make misleading. It seems it is just math term. - if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape, - &real_multiplier) || - !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) - { - return false; - } - CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, - &output_activation_max); - static gemmlowp::GemmContext gemm_context; - // Prevent concurrent executions that access gemm_context. - std::unique_lock<std::mutex> lock(executionMutex); - // Alow gemmlowp automatically decide how many threads to use. - gemm_context.set_max_num_threads(0); - ::tflite::optimized_ops::FullyConnected( - _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData, - convertShapeToDims(_weightsShape), weightsOffset, - reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset, - output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData, - convertShapeToDims(_outputShape), &gemm_context); - return true; + throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"}; } void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape, diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h index b1ba172b0..20a388349 100644 --- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h +++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class FullyConnectedLayer : public ::arm_compute::IFunction +class FullyConnectedLayer : public ::neurun::exec::IFunction { public: FullyConnectedLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc index 3d96bb401..c4a288b07 100644 --- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc @@ -26,14 +26,14 @@ namespace kernel namespace cpu { -#define MAXPOOLING_PARAMETERS \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; +#define MAXPOOLING_PARAMETERS \ + tflite::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; MaxPoolLayer::MaxPoolLayer() : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0), @@ -46,31 +46,30 @@ MaxPoolLayer::MaxPoolLayer() bool MaxPoolLayer::maxPoolFloat32() { - MAXPOOLING_PARAMETERS float output_activation_min, output_activation_max; CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; - ::tflite::optimized_ops::MaxPool( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth, - _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight, - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); + ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape), + reinterpret_cast<const float *>(_inputData), + convertShapeToTFLiteShape(_outputShape), + reinterpret_cast<float *>(_outputData)); return true; } bool MaxPoolLayer::maxPoolQuant8() { - MAXPOOLING_PARAMETERS int32_t output_activation_min = 0; int32_t output_activation_max = 0; CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; - ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth, - _strideHeight, paddingWidth, paddingHeight, _kernelWidth, - _kernelHeight, output_activation_min, output_activation_max, - _outputData, convertShapeToDims(_outputShape)); + ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape), _inputData, + convertShapeToTFLiteShape(_outputShape), _outputData); return true; } diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h index b42efb9f6..2b185550b 100644 --- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h +++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class MaxPoolLayer : public ::arm_compute::IFunction +class MaxPoolLayer : public ::neurun::exec::IFunction { public: MaxPoolLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc index 5ec2f8e62..b28508c27 100644 --- a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc +++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc @@ -184,7 +184,7 @@ int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) return static_cast<int32_t>(std::floor(max_input_rescaled)); } -Shape getShape(const ::neurun::graph::operand::Object &o) +Shape getShape(const ::neurun::model::operand::Object &o) { Shape shape; diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h index 5914d04e3..3610990a5 100644 --- a/runtimes/neurun/src/kernel/cpu/OperationUtils.h +++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h @@ -23,11 +23,13 @@ #include <limits> #include <vector> +#include "tensorflow/contrib/lite/c/builtin_op_data.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" -#include "graph/operand/Object.h" -#include "graph/operand/DataType.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "model/operand/Object.h" +#include "model/operand/DataType.h" -using OperandType = neurun::graph::operand::DataType; +using OperandType = neurun::model::operand::DataType; namespace neurun { @@ -75,6 +77,51 @@ inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape) return dims; } +inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= shape.dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = shape.dimensions[i]; + } + } + + return ::tflite::GetTensorShape(raw_shape); +} + +inline TfLiteFusedActivation convertFusedActivation(FuseCode act) +{ + if (act == ANEURALNETWORKS_FUSED_NONE) + { + return kTfLiteActNone; + } + + if (act == ANEURALNETWORKS_FUSED_RELU) + { + return kTfLiteActRelu; + } + + if (act == ANEURALNETWORKS_FUSED_RELU1) + { + return kTfLiteActRelu1; + } + + if (act == ANEURALNETWORKS_FUSED_RELU6) + { + return kTfLiteActRelu6; + } + + return kTfLiteActNone; +} + __wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int32_t *right_shift); @@ -92,7 +139,7 @@ void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift); -Shape getShape(const ::neurun::graph::operand::Object &o); +Shape getShape(const ::neurun::model::operand::Object &o); uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions); diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc new file mode 100644 index 000000000..ba8c5ab92 --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nhwc/View.h" +#include "util/feature/nchw/View.h" +#include "util/feature/Coordinate4D.h" + +#include <misc/feature/IndexIterator.h> + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +using Type = model::operation::PermuteNode::Type; + +void PermuteLayer::configure(std::shared_ptr<::neurun::backend::operand::IObject> input, + std::shared_ptr<::neurun::backend::operand::IObject> output, + const model::operand::Shape &shape, Type type) +{ + _input = input; + _output = output; + _shape = shape; + _type = type; +} + +void PermuteLayer::run() +{ + auto rank = _shape.rank(); + + switch (_type) + { + case Type::NHWC_TO_NCHW: + { + auto fn = [&](::neurun::backend::operand::ITensor &tensor) { + auto input_tensor = _input->ptr(); + + auto input_buffer = input_tensor->buffer(); + auto input_size = input_tensor->total_size(); + + auto output_buffer = tensor.buffer(); + auto output_size = tensor.total_size(); + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, input_buffer, input_size); + break; + } + case 2: + { + auto matrix_shape = _shape.asMatrix(); + + for (auto h = 0; h < matrix_shape.H; ++h) + { + neurun::util::feature::Coordinate4D coord{0, h, 0, 0}; + memcpy(output_buffer + tensor.calcOffset(coord), input_buffer + h * matrix_shape.W, + matrix_shape.W * sizeof(float)); + } + break; + } + case 3: + { + const int32_t depth = _shape.dim(0); + const int32_t height = _shape.dim(1); + const int32_t width = _shape.dim(2); + + for (auto c = 0; c < depth; ++c) + { + for (auto h = 0; h < height; ++h) + { + neurun::util::feature::Coordinate4D coord{0, h, 0, c}; + memcpy(output_buffer + tensor.calcOffset(coord), + input_buffer + c * height * width + h * width, width * sizeof(float)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + const util::feature::nhwc::Reader<float> from{ + feature, reinterpret_cast<const float *>(input_buffer), input_size}; + util::feature::nchw::View<float> into{&tensor}; + + ::nnfw::misc::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } + }; + _output->access(fn); + break; + } + case Type::NCHW_TO_NHWC: + { + auto fn = [&](::neurun::backend::operand::ITensor &tensor) { + auto input_buffer = tensor.buffer(); + auto input_size = tensor.total_size(); + + auto output_tensor = _output->ptr(); + + auto output_buffer = output_tensor->buffer(); + auto output_size = output_tensor->total_size(); + + switch (rank) + { + case 0: + case 1: + { + memcpy(output_buffer, input_buffer, output_size); + break; + } + case 2: + { + auto matrix_shape = _shape.asMatrix(); + + for (auto h = 0; h < matrix_shape.H; ++h) + { + neurun::util::feature::Coordinate4D coord{0, h, 0, 0}; + memcpy(output_buffer + h * matrix_shape.W, input_buffer + tensor.calcOffset(coord), + matrix_shape.W * sizeof(float)); + } + break; + } + case 3: + { + const int32_t depth = _shape.dim(0); + const int32_t height = _shape.dim(1); + const int32_t width = _shape.dim(2); + + for (auto c = 0; c < depth; ++c) + { + for (auto h = 0; h < height; ++h) + { + neurun::util::feature::Coordinate4D coord{0, h, 0, c}; + memcpy(output_buffer + c * height * width + h * width, + input_buffer + tensor.calcOffset(coord), width * sizeof(float)); + } + } + break; + } + case 4: + { + auto feature = _shape.asFeature(); + + const util::feature::nchw::View<float> from{&tensor}; + util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer), + output_size}; + + ::nnfw::misc::feature::iterate(feature) + << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { + const auto value = from.at(batch, ch, row, col); + into.at(batch, ch, row, col) = value; + }; + break; + } + default: + throw "NYI"; + break; + } + }; + _input->access(fn); + break; + } + case Type::COPY: + // If two different backends using same tensor layout, we need this. + throw "NYI"; + break; + } +} + +} // namespace cpu +} // namespace kernel +} // namespace neurun diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.h b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h new file mode 100644 index 000000000..d9e1709bc --- /dev/null +++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__ +#define __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__ + +#include <NeuralNetworks.h> + +#include "exec/interface/IFunction.h" + +#include "util/feature/nhwc/View.h" +#include "OperationUtils.h" +#include "backend/interface/operand/IObject.h" +#include "model/operation/PermuteNode.h" + +namespace neurun +{ +namespace kernel +{ +namespace cpu +{ + +class PermuteLayer : public ::neurun::exec::IFunction +{ +public: + PermuteLayer() = default; + +public: + void configure(std::shared_ptr<::neurun::backend::operand::IObject> input, + std::shared_ptr<::neurun::backend::operand::IObject> output, + const model::operand::Shape &shape, model::operation::PermuteNode::Type type); + void run(); + +private: + std::shared_ptr<::neurun::backend::operand::IObject> _input; + std::shared_ptr<::neurun::backend::operand::IObject> _output; + model::operand::Shape _shape; + model::operation::PermuteNode::Type _type; +}; + +} // namespace cpu +} // namespace kernel +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_PERMUTE_LAYER_H__ diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h index 395cc1d7f..51d0bacee 100644 --- a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h +++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class ReshapeLayer : public ::arm_compute::IFunction +class ReshapeLayer : public ::neurun::exec::IFunction { public: ReshapeLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc index 4f5a69f2e..c998c65f6 100644 --- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc @@ -33,45 +33,86 @@ SoftMaxLayer::SoftMaxLayer() // DO NOTHING } +// Performs softmax along the input of size (input_size * batch_size). +void Softmax(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + TF_LITE_ASSERT(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + bool SoftMaxLayer::softmaxFloat32() { - ::tflite::Dims<4> dim; + Shape shapeIn4D; + if (getNumberOfDimensions(_inputShape) == 2) { uint32_t batch_size = getSizeOfDimension(_inputShape, 0); uint32_t input_size = getNumberOfElements(_inputShape) / batch_size; - Shape shapeIn4D; - shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; - dim = convertShapeToDims(shapeIn4D); + Softmax(reinterpret_cast<const float *>(_inputData), input_size, batch_size, _beta, + reinterpret_cast<float *>(_outputData)); } else if (getNumberOfDimensions(_inputShape) == 4) { - dim = convertShapeToDims(_inputShape); + ::tflite::SoftmaxParams op_params; + op_params.beta = _beta; + ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(_inputShape), + reinterpret_cast<const float *>(_inputData), + convertShapeToTFLiteShape(_outputShape), + reinterpret_cast<float *>(_outputData)); } else { std::cout << "only 2D and 4D tensors supported" << std::endl; return false; } - ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta, - reinterpret_cast<float *>(_outputData), dim); + return true; } bool SoftMaxLayer::softmaxQuant8() { - ::tflite::Dims<4> dim; + Shape shapeIn4D = _inputShape; + if (getNumberOfDimensions(_inputShape) == 2) { uint32_t batch_size = getSizeOfDimension(_inputShape, 0); uint32_t input_size = getNumberOfElements(_inputShape) / batch_size; - Shape shapeIn4D; shapeIn4D.dimensions = {batch_size, 1, 1, input_size}; - dim = convertShapeToDims(shapeIn4D); } else if (getNumberOfDimensions(_inputShape) == 4) { - dim = convertShapeToDims(_inputShape); + shapeIn4D = _inputShape; } else { @@ -94,8 +135,13 @@ bool SoftMaxLayer::softmaxQuant8() return false; } float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift); - ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min, - _outputData, dim); + + ::tflite::SoftmaxParams op_params; + op_params.input_multiplier = input_multiplier; + op_params.input_left_shift = input_left_shift; + op_params.diff_min = diff_min; + ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(shapeIn4D), _inputData, + convertShapeToTFLiteShape(shapeIn4D), _outputData); return true; } diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h index 8057be52f..df1aa4044 100644 --- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h +++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h @@ -19,7 +19,7 @@ #include <NeuralNetworks.h> -#include <arm_compute/runtime/IFunction.h> +#include "exec/interface/IFunction.h" #include "kernel/cpu/OperationUtils.h" @@ -30,7 +30,7 @@ namespace kernel namespace cpu { -class SoftMaxLayer : public ::arm_compute::IFunction +class SoftMaxLayer : public ::neurun::exec::IFunction { public: SoftMaxLayer(); diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc deleted file mode 100644 index 00e914732..000000000 --- a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#include "TensorConvertFromCommonLayer.h" - -#include "internal/nnapi/feature/Reader.h" -#include "internal/nnapi/feature/View.h" - -#include <util/feature/IndexIterator.h> - -namespace neurun -{ -namespace kernel -{ -namespace cpu -{ - -bool TensorConvertFromCommonLayer::convert() -{ - auto inputBuffer = _inputTensor->buffer(); - auto inputSize = _inputTensor->info()->total_size(); - - auto outputBuffer = _outputTensor->buffer(); - auto outputSize = _outputTensor->info()->total_size(); - - if (_tensorShape.rank() == 2) - { - const auto len = _tensorShape.dim(1); - - auto base = reinterpret_cast<const float *>(inputBuffer); - - for (int32_t n = 0; n < len; ++n) - { - auto from = base + n; - auto into = - reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n})); - - *into = *from; - } - } - else if (_tensorShape.rank() == 4) - { - auto featureShape = _tensorShape.asFeature(); - - const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; - ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; - - ::nnfw::util::feature::iterate(featureShape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - } -} - -void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor, - ::internal::cpu::Tensor *outputTensor, - const Shape &tensorShape) -{ - _inputTensor = inputTensor; - _outputTensor = outputTensor; - _tensorShape = tensorShape; -} - -void TensorConvertFromCommonLayer::run() { convert(); } - -} // namespace cpu -} // namespace kernel -} // namespace neurun - -#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h deleted file mode 100644 index 56f7bcf32..000000000 --- a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ -#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ - -#include <NeuralNetworks.h> - -#include <arm_compute/runtime/IFunction.h> - -#include "internal/Model.h" -#include "internal/common/Tensor.h" -#include "internal/cpu.h" - -namespace neurun -{ -namespace kernel -{ -namespace cpu -{ - -class TensorConvertFromCommonLayer : public ::arm_compute::IFunction -{ -public: - TensorConvertFromCommonLayer() {} - -public: - bool convert(); - - void configure(::internal::common::Tensor *inputTensor, ::internal::cpu::Tensor *outputTensor, - const Shape &tensorShape); - - void run(); - -private: - ::internal::common::Tensor *_inputTensor; - ::internal::cpu::Tensor *_outputTensor; - - Shape _tensorShape{1}; -}; - -} // namespace cpu -} // namespace kernel -} // namespace neurun - -#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__ - -#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc deleted file mode 100644 index 7d721f494..000000000 --- a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#include "TensorConvertToCommonLayer.h" - -#include "internal/nnapi/feature/Reader.h" -#include "internal/nnapi/feature/View.h" - -#include <util/feature/IndexIterator.h> - -namespace neurun -{ -namespace kernel -{ -namespace cpu -{ - -bool TensorConvertToCommonLayer::convert() -{ - auto inputBuffer = _inputTensor->buffer(); - auto inputSize = _inputTensor->info()->total_size(); - - auto outputBuffer = _outputTensor->buffer(); - auto outputSize = _outputTensor->info()->total_size(); - - if (_tensorShape.rank() == 2) - { - const auto len = _tensorShape.dim(1); - - auto base = reinterpret_cast<float *>(outputBuffer); - - for (int32_t n = 0; n < len; ++n) - { - auto from = reinterpret_cast<const float *>( - _inputTensor->ptr_to_element(::arm_compute::Coordinates{n})); - auto into = base + n; - - *into = *from; - } - } - else if (_tensorShape.rank() == 4) - { - auto featureShape = _tensorShape.asFeature(); - - const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize}; - ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize}; - - ::nnfw::util::feature::iterate(featureShape) - << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto value = from.at(batch, ch, row, col); - into.at(batch, ch, row, col) = value; - }; - } -} - -void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor, - ::internal::common::Tensor *outputTensor, - const Shape &tensorShape) -{ - _inputTensor = inputTensor; - _outputTensor = outputTensor; - _tensorShape = tensorShape; -} - -void TensorConvertToCommonLayer::run() { convert(); } - -} // namespace cpu -} // namespace kernel -} // namespace neurun - -#endif diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h deleted file mode 100644 index 7e96d1aff..000000000 --- a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// -// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE -// - -#if 0 - -#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ -#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ - -#include <NeuralNetworks.h> - -#include <arm_compute/runtime/IFunction.h> - -#include "internal/Model.h" -#include "internal/common/Tensor.h" -#include "internal/cpu.h" - -namespace neurun -{ -namespace kernel -{ -namespace cpu -{ - -class TensorConvertToCommonLayer : public ::arm_compute::IFunction -{ -public: - TensorConvertToCommonLayer() {} - -public: - bool convert(); - - void configure(::internal::cpu::Tensor *inputTensor, ::internal::common::Tensor *outputTensor, - const Shape &tensorShape); - - void run(); - -private: - ::internal::cpu::Tensor *_inputTensor; - ::internal::common::Tensor *_outputTensor; - - Shape _tensorShape{1}; -}; - -} // namespace cpu -} // namespace kernel -} // namespace neurun - -#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__ - -#endif |