diff options
Diffstat (limited to 'runtime/neurun/backend/cpu/kernel')
32 files changed, 3317 insertions, 0 deletions
diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.cc b/runtime/neurun/backend/cpu/kernel/AddLayer.cc new file mode 100644 index 000000000..8a2d872e5 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AddLayer.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AddLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void AddLayer::addFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a + b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void AddLayer::addQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::BinaryArithmeticOpParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 add is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void AddLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void AddLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + addFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + addQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/AddLayer.h b/runtime/neurun/backend/cpu/kernel/AddLayer.h new file mode 100644 index 000000000..7018e4c48 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AddLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class AddLayer : public ::neurun::exec::IFunction +{ +public: + AddLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void addFloat32(); + + void addQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_ADDLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc new file mode 100644 index 000000000..389955796 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AvgPoolLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/AveragePool.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +#define AVGPOOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; + +AvgPoolLayer::AvgPoolLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), + _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void AvgPoolLayer::averagePoolFloat32() +{ + AVGPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void AvgPoolLayer::averagePoolQuant8() +{ + AVGPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::AveragePool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void AvgPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t kernelWidth, const uint32_t kernelHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void AvgPoolLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + averagePoolFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + averagePoolQuant8(); + } +} + +#undef AVGPOOLING_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h new file mode 100644 index 000000000..6339efa41 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/AvgPoolLayer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class AvgPoolLayer : public ::neurun::exec::IFunction +{ +public: + AvgPoolLayer(); + +public: + void averagePoolFloat32(); + + void averagePoolQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_AVGPOOLLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc new file mode 100644 index 000000000..471c9b3bb --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConcatLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Concatenation.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +ConcatLayer::ConcatLayer() + : _inputDataPtrs(), _outputData(), _axis(0), _inputDescriptors(), _outputDescr(), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void ConcatLayer::concatenationFloat32() +{ + uint32_t num_inputs = _inputDescriptors.size(); + + nnfw::cker::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + + for (uint32_t i = 0; i < num_inputs; i++) + { + inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i])); + inputDimsPtr.push_back(&inputDims[i]); + } + + std::vector<const float *> inputFloatPtrs; + + for (auto ptr : _inputDataPtrs) + { + inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr)); + } + + nnfw::cker::Concatenation<float>(op_params, inputDimsPtr.data(), inputFloatPtrs.data(), + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void ConcatLayer::concatenationQuant8() +{ + uint32_t num_inputs = _inputDescriptors.size(); + + std::vector<int32_t> input_zeropoints(num_inputs); + std::vector<float> input_scales(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) + { + input_zeropoints[i] = _inputDescriptors[i].offset; + input_scales[i] = _inputDescriptors[i].scale; + } + + nnfw::cker::ConcatenationParams op_params; + op_params.axis = _axis; + op_params.inputs_count = num_inputs; + op_params.input_zeropoint = input_zeropoints.data(); + op_params.input_scale = input_scales.data(); + op_params.output_zeropoint = _outputDescr.offset; + op_params.output_scale = _outputDescr.scale; + + std::vector<nnfw::cker::Shape *> inputDimsPtr; + std::vector<nnfw::cker::Shape> inputDims; + inputDimsPtr.reserve(num_inputs); + inputDims.reserve(num_inputs); + for (uint32_t i = 0; i < num_inputs; i++) + { + inputDims.push_back(convertTensorDescriptorToCkerShape(_inputDescriptors[i])); + inputDimsPtr.push_back(&inputDims[i]); + } + + nnfw::cker::Concatenation<uint8_t>(op_params, inputDimsPtr.data(), _inputDataPtrs.data(), + convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.u8); +} + +void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis, + uint8_t *outputData, const TensorDescriptor outputDescr) +{ + _inputDataPtrs = inputDataPtrs; + + for (auto inputDescr : inputDescriptors) + { + _inputDescriptors.emplace_back(inputDescr); + _inputType = inputDescr.type; + } + + _axis = axis; + + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConcatLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + concatenationFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + concatenationQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ConcatLayer.h b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h new file mode 100644 index 000000000..048aa4208 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConcatLayer.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ConcatLayer : public ::neurun::exec::IFunction +{ +public: + ConcatLayer(); + +public: + void concatenationFloat32(); + + void concatenationQuant8(); + + void configure(const std::vector<const uint8_t *> &inputDataPtrs, + const std::vector<TensorDescriptor> &inputDescriptors, int32_t axis, + uint8_t *outputData, const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + std::vector<const uint8_t *> _inputDataPtrs; + DataPtr _outputData; + + int32_t _axis; + + std::vector<TensorDescriptor> _inputDescriptors; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_CONCATLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc new file mode 100644 index 000000000..2fdb0baf7 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ConvolutionLayer.h" + +#include <cker/operation/Conv.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ +ConvolutionLayer::ConvolutionLayer() + : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(), + _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void ConvolutionLayer::convFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::ConvParams op_params; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.f, + convertTensorDescriptorToCkerShape(_biasDescr), _biasData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} + +void ConvolutionLayer::convQuant8() +{ + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + nnfw::cker::ConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_kernelDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::Conv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_kernelDescr), _kernelData.u8, + convertTensorDescriptorToCkerShape(_biasDescr), _biasData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void ConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + convQuant8(); + } +} + +#undef ANDROID_NN_CONV_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h new file mode 100644 index 000000000..16669f316 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ConvolutionLayer.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + ConvolutionLayer(); + +public: + void convFloat32(); + + void convQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _outputData; + DataPtr _biasData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _outputDescr; + TensorDescriptor _biasDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_CONVOLUTIONLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc new file mode 100644 index 000000000..e33e3465e --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DepthwiseConvolutionLayer.h" + +#include <cker/operation/DepthwiseConv.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +DepthwiseConvolutionLayer::DepthwiseConvolutionLayer() + : _inputData(), _kernelData(), _outputData(), _biasData(), _inputDescr(), _kernelDescr(), + _outputDescr(), _biasDescr(), _paddingLeft(0), _paddingTop(0), _paddingRight(0), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _multiplier(0), + _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void DepthwiseConvolutionLayer::convFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::DepthwiseConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.depth_multiplier = _multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.f, convertTensorDescriptorToCkerShape(_kernelDescr), + _kernelData.f, convertTensorDescriptorToCkerShape(_biasDescr), + _biasData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f); +} + +void DepthwiseConvolutionLayer::convQuant8() +{ + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _kernelDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + + nnfw::cker::DepthwiseConvParams op_params; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + op_params.padding_values.width = _paddingLeft; + op_params.padding_values.height = _paddingTop; + op_params.depth_multiplier = _multiplier; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_kernelDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::DepthwiseConv(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.u8, convertTensorDescriptorToCkerShape(_kernelDescr), + _kernelData.u8, convertTensorDescriptorToCkerShape(_biasDescr), + _biasData.i32, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.u8); +} + +void DepthwiseConvolutionLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *kernelData, const TensorDescriptor kernelDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t multiplier, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _kernelData.u8 = kernelData; + _kernelDescr = kernelDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _multiplier = multiplier; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void DepthwiseConvolutionLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + convFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + convQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h new file mode 100644 index 000000000..575cc0ab1 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/DepthwiseConvolutionLayer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ +#define __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class DepthwiseConvolutionLayer : public ::neurun::exec::IFunction +{ +public: + DepthwiseConvolutionLayer(); + +public: + void convFloat32(); + + void convQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *kernelData, + const TensorDescriptor kernelDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH, + const uint32_t multiplier, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _kernelData; + DataPtr _outputData; + DataPtr _biasData; + + TensorDescriptor _inputDescr; + TensorDescriptor _kernelDescr; + TensorDescriptor _outputDescr; + TensorDescriptor _biasDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + + uint32_t _multiplier; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // backend +} // namespace neurun + +#endif // __NEURUN_KERNEL_CPU_DEPTHWISECONVOLUTIONLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc new file mode 100644 index 000000000..055f71590 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "FullyConnectedLayer.h" + +#include <cker/operation/FullyConnected.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +FullyConnectedLayer::FullyConnectedLayer() + : _inputData(), _weightsData(), _biasData(), _outputData(), _inputDescr(), _weightsDescr(), + _biasDescr(), _outputDescr(), _activation(ir::Activation::NONE), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void FullyConnectedLayer::fullyConnectedFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + + nnfw::cker::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.f, + convertToExtendedCkerShape(_weightsDescr), _weightsData.f, + convertToExtendedCkerShape(_biasDescr), _biasData.f, + convertToExtendedCkerShape(_outputDescr), _outputData.f); +} + +// executionMutex is used to protect concurrent access of non-threadsafe resources +// like gemmlowp::GemmContext. +void FullyConnectedLayer::fullyConnectedQuant8() +{ + float real_multiplier = 0.0; + int32_t output_multiplier = 0; + int32_t output_shift = 0; + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + GetQuantizedConvolutionMultiplier(_inputDescr, _weightsDescr, _biasDescr, _outputDescr, + &real_multiplier); + QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift); + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + + nnfw::cker::FullyConnectedParams op_params; + op_params.input_offset = -_inputDescr.offset; + op_params.weights_offset = -_weightsDescr.offset; + op_params.output_offset = _outputDescr.offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::FullyConnected(op_params, convertToExtendedCkerShape(_inputDescr), _inputData.u8, + convertToExtendedCkerShape(_weightsDescr), _weightsData.u8, + convertToExtendedCkerShape(_biasDescr), _biasData.i32, + convertToExtendedCkerShape(_outputDescr), _outputData.u8); +} + +void FullyConnectedLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + uint8_t *weightsData, const TensorDescriptor weightsDescr, + uint8_t *biasData, const TensorDescriptor biasDescr, + ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _weightsData.u8 = weightsData; + _weightsDescr = weightsDescr; + _biasData.u8 = biasData; + _biasDescr = biasDescr; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void FullyConnectedLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + fullyConnectedFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + fullyConnectedQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h new file mode 100644 index 000000000..9fdc393a4 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/FullyConnectedLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class FullyConnectedLayer : public ::neurun::exec::IFunction +{ +public: + FullyConnectedLayer(); + +public: + void fullyConnectedFloat32(); + + void fullyConnectedQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *weightsData, + const TensorDescriptor weightsDescr, uint8_t *biasData, + const TensorDescriptor biasDescr, ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _weightsData; + DataPtr _biasData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _weightsDescr; + TensorDescriptor _biasDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_FULLYCONNECTEDLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.cc b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc new file mode 100644 index 000000000..b29acba79 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "GatherLayer.h" + +#include <cker/operation/Gather.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void GatherLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *indicesData, const TensorDescriptor &indicesDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr, int32_t axis) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _indicesData.u8 = indicesData; + _indicesDescr = indicesDescr; + _axis = axis; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void GatherLayer::run() +{ + nnfw::cker::GatherParams op_params; + op_params.axis = _axis; + + switch (_inputType) + { + case OperandType::FLOAT32: + nnfw::cker::Gather<float>(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.f, convertTensorDescriptorToCkerShape(_indicesDescr), + _indicesData.i32, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f); + break; + case OperandType::QUANT8_ASYMM: + nnfw::cker::Gather<uint8_t>(op_params, convertTensorDescriptorToCkerShape(_inputDescr), + _inputData.u8, convertTensorDescriptorToCkerShape(_indicesDescr), + _indicesData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); + break; + case OperandType::INT32: + nnfw::cker::Gather<int32_t>( + op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.i32, + convertTensorDescriptorToCkerShape(_indicesDescr), _indicesData.i32, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.i32); + break; + default: + throw std::runtime_error("Gather NYI for this operand type!"); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/GatherLayer.h b/runtime/neurun/backend/cpu/kernel/GatherLayer.h new file mode 100644 index 000000000..af4f8b8f6 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/GatherLayer.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class GatherLayer : public ::neurun::exec::IFunction +{ +public: + GatherLayer() + : _inputData{nullptr}, _indicesData{nullptr}, _outputData{nullptr}, _axis{-1}, + _inputType{OperandType::FLOAT32} + { + // DO NOTHING + } + +public: + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *indicesData, + const TensorDescriptor &indicesDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr, int32_t axis); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _indicesData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _indicesDescr; + TensorDescriptor _outputDescr; + + int32_t _axis; + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_GATHERLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc new file mode 100644 index 000000000..d9916964e --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "LogisticLayer.h" + +#include <cker/operation/Logistic.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +LogisticLayer::LogisticLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void LogisticLayer::logisticFloat32() +{ + nnfw::cker::Logistic(convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} + +void LogisticLayer::logisticQuant8() +{ + // cker quant8 logistic is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void LogisticLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void LogisticLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + logisticFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + logisticQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/LogisticLayer.h b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h new file mode 100644 index 000000000..33fcd6fed --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/LogisticLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class LogisticLayer : public ::neurun::exec::IFunction +{ +public: + LogisticLayer(); + +public: + void logisticFloat32(); + + void logisticQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_LOGISTICLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc new file mode 100644 index 000000000..095cd6d1d --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MaxPoolLayer.h" + +#include <cker/operation/MaxPool.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +#define MAXPOOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = _strideHeight; \ + op_params.stride_width = _strideWidth; \ + op_params.filter_height = _kernelHeight; \ + op_params.filter_width = _kernelWidth; \ + op_params.padding_values.height = (int8_t)_paddingTop; \ + op_params.padding_values.width = (int8_t)_paddingLeft; + +MaxPoolLayer::MaxPoolLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _paddingLeft(0), _paddingTop(0), + _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), + _kernelHeight(0), _activation(ir::Activation::NONE), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void MaxPoolLayer::maxPoolFloat32() +{ + MAXPOOLING_PARAMETERS + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); +} +void MaxPoolLayer::maxPoolQuant8() +{ + MAXPOOLING_PARAMETERS + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + nnfw::cker::MaxPool(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.u8, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.u8); +} + +void MaxPoolLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, + const uint32_t paddingLeft, const uint32_t paddingRight, + const uint32_t paddingTop, const uint32_t paddingBottom, + const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t kernelWidth, const uint32_t kernelHeight, + const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr) +{ + _inputData.u8 = inputData; + + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _paddingLeft = paddingLeft; + _paddingRight = paddingRight; + _paddingTop = paddingTop; + _paddingBottom = paddingBottom; + _strideWidth = strideWidth; + _strideHeight = strideHeight; + _kernelWidth = kernelWidth; + _kernelHeight = kernelHeight; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void MaxPoolLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + maxPoolFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + maxPoolQuant8(); + } +} + +#undef MAXPOOLING_PARAMETERS + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h new file mode 100644 index 000000000..88a574c42 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MaxPoolLayer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class MaxPoolLayer : public ::neurun::exec::IFunction +{ +public: + MaxPoolLayer(); + +public: + void maxPoolFloat32(); + + void maxPoolQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, const uint32_t paddingLeft, + const uint32_t paddingRight, const uint32_t paddingTop, + const uint32_t paddingBottom, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, uint8_t *outputData, + const TensorDescriptor outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + uint32_t _paddingLeft; + uint32_t _paddingTop; + uint32_t _paddingRight; + uint32_t _paddingBottom; + + uint32_t _strideWidth; + uint32_t _strideHeight; + uint32_t _kernelWidth; + uint32_t _kernelHeight; + + ir::Activation _activation; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_MAXPOOLLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.cc b/runtime/neurun/backend/cpu/kernel/MulLayer.cc new file mode 100644 index 000000000..d6ce2cfad --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MulLayer.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "MulLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void MulLayer::mulFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a * b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void MulLayer::mulQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::BinaryArithmeticOpParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 mul is not implemented yet + throw std::runtime_error{"Mull NYI for quantized"}; +} + +void MulLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void MulLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + mulFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + mulQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/MulLayer.h b/runtime/neurun/backend/cpu/kernel/MulLayer.h new file mode 100644 index 000000000..05fc3052f --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/MulLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class MulLayer : public ::neurun::exec::IFunction +{ +public: + MulLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void mulFloat32(); + + void mulQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_MULLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.cc b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc new file mode 100644 index 000000000..8aa15dcbd --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.cc @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "OperationUtils.h" + +#include <cmath> +#include <algorithm> +#include <cassert> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +uint32_t getNumberOfDimensions(const TensorDescriptor &descr) { return descr.dimensions.size(); } + +uint32_t getNumberOfElements(const TensorDescriptor &descr) +{ + uint32_t count = 1; + for (size_t i = 0; i < descr.dimensions.size(); i++) + { + count *= descr.dimensions[i]; + } + return count; +} + +uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx) +{ + if (dimensionIdx >= descr.dimensions.size()) + { + // TODO, log the error + return 0; + } + return descr.dimensions[dimensionIdx]; +} + +void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift) +{ + if (double_multiplier == 0.) + { + *quantized_multiplier = 0; + *shift = 0; + return; + } + const double q = std::frexp(double_multiplier, shift); + auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*shift; + } + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); +} + +void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr, + const TensorDescriptor &filterDescr, + const TensorDescriptor &biasDescr, + const TensorDescriptor &outputDescr, float *multiplier) +{ + const float input_product_scale = inputDescr.scale * filterDescr.scale; + const float bias_scale = biasDescr.scale; + const float output_scale = outputDescr.scale; + // The following conditions must be guaranteed by the training pipeline. + UNUSED_RELEASE(bias_scale); + assert(std::abs(input_product_scale - bias_scale) <= + 1e-6 * std::min(input_product_scale, bias_scale)); + assert(input_product_scale >= 0); + assert(input_product_scale < output_scale); + *multiplier = input_product_scale / output_scale; +} + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift) +{ + assert(double_multiplier > 1.); + const double q = std::frexp(double_multiplier, left_shift); + int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31))); + assert(q_fixed <= (1ll << 31)); + if (q_fixed == (1ll << 31)) + { + q_fixed /= 2; + ++*left_shift; + } + assert(*left_shift >= 0); + assert(q_fixed <= std::numeric_limits<int32_t>::max()); + *quantized_multiplier = static_cast<int32_t>(q_fixed); +} + +void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min, + float *activation_max) +{ + if (activation == ir::Activation::RELU) + { + *activation_min = 0.f; + *activation_max = std::numeric_limits<float>::max(); + } + else if (activation == ir::Activation::RELU6) + { + *activation_min = 0.f; + *activation_max = 6.f; + } + else if (activation == ir::Activation::RELU1) + { + *activation_min = -1.f; + *activation_max = 1.f; + } + else if (activation == ir::Activation::SIGMOID) + { + *activation_min = 0.f; + *activation_max = 1.f; + } + else if (activation == ir::Activation::NONE) + { + *activation_min = std::numeric_limits<float>::lowest(); + *activation_max = std::numeric_limits<float>::max(); + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr, + int32_t *act_min, int32_t *act_max) +{ + const int32_t qmin = std::numeric_limits<uint8_t>::min(); + const int32_t qmax = std::numeric_limits<uint8_t>::max(); + const auto scale = outputDescr.scale; + const auto zero_point = outputDescr.offset; + auto quantize = [scale, zero_point](float f) { + return zero_point + static_cast<int32_t>(std::round(f / scale)); + }; + if (activation == ir::Activation::RELU) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = qmax; + } + else if (activation == ir::Activation::RELU6) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(6.0)); + } + else if (activation == ir::Activation::RELU1) + { + *act_min = std::max(qmin, quantize(-1.0)); + *act_max = std::min(qmax, quantize(1.0)); + } + else if (activation == ir::Activation::SIGMOID) + { + *act_min = std::max(qmin, quantize(0.0)); + *act_max = std::min(qmax, quantize(1.0)); + } + else if (activation == ir::Activation::NONE) + { + *act_min = qmin; + *act_max = qmax; + } + else + { + std::cout << "Unsupported fused activation function." << std::endl; + } +} + +bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2) +{ + if (input1 == input2) + return true; + if (input2 == NULL || input2 == NULL) + return false; + + if (input1 == NULL) + { + return (getNumberOfDimensions(*input2) == 0); + } + + if (getNumberOfDimensions(*input1) != getNumberOfDimensions(*input2)) + return false; + + for (uint32_t i = 0; i < getNumberOfDimensions(*input1); i++) + if (input1->dimensions[i] != input2->dimensions[i]) + return false; + + return true; +} + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) +{ + const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * + (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift); + // Tighten bound using floor. Suppose that we could use the exact value. + // After scaling the difference, the result would be at the maximum. Thus we + // must ensure that our value has lower magnitude. + return static_cast<int32_t>(std::floor(max_input_rescaled)); +} + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout) +{ + TensorDescriptor descriptor; + + auto dims = o.shape().dims(); + if (frontend_layout == ir::Layout::NCHW && o.shape().rank() == 4) + { + // NCHW -> NHWC + uint32_t permutation[4] = {0, 2, 3, 1}; + for (int i = 0; i < o.shape().rank(); ++i) + { + dims.at(i) = o.shape().dim(permutation[i]); + } + } + descriptor.dimensions = std::vector<uint32_t>(dims.begin(), dims.end()); + descriptor.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type())); + descriptor.scale = o.typeInfo().scale(); + descriptor.offset = o.typeInfo().offset(); + + // CPU backend assume that neurun internal shape's rank is always same or less than 4 + assert(descriptor.dimensions.size() <= 4); + + return descriptor; +} + +uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions) +{ + uint32_t size = 4; + + switch (type) + { + case OperandType::FLOAT32: + case OperandType::INT32: + case OperandType::UINT32: + size = 4; + break; + case OperandType::BOOL8: + case OperandType::QUANT8_ASYMM: + case OperandType::QUANT8_SYMM: + size = 1; + break; + default: + throw std::runtime_error("Not supported operand type."); + break; + } + + for (auto d : dimensions) + { + size *= d; + } + + return size; +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/OperationUtils.h b/runtime/neurun/backend/cpu/kernel/OperationUtils.h new file mode 100644 index 000000000..b9e8c8974 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/OperationUtils.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ +#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ + +#include <iostream> +#include <limits> +#include <vector> + +#include <cker/Shape.h> + +#include "ir/Operand.h" +#include "ir/DataType.h" +#include <ir/InternalType.h> + +using OperandType = neurun::ir::DataType; + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +struct TensorDescriptor +{ + OperandType type; + std::vector<uint32_t> dimensions; + float scale; + int32_t offset; +}; + +union DataPtr { + uint8_t *u8; + int8_t *i8; + int32_t *i32; + float *f; + void *v; +}; + +uint32_t getNumberOfDimensions(const TensorDescriptor &descr); + +uint32_t getNumberOfElements(const TensorDescriptor &descr); + +uint32_t getSizeOfDimension(const TensorDescriptor &descr, uint32_t dimensionIdx); + +inline nnfw::cker::Shape convertToExtendedCkerShape(const TensorDescriptor &descr) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + uint32_t src = 4 - descr.dimensions.size(); + for (uint32_t i = 0; i < 4; ++i) + { + if (i < src) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = descr.dimensions[i - src]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline nnfw::cker::Shape convertTensorDescriptorToCkerShape(const TensorDescriptor &descr) +{ + std::vector<int32_t> raw_shape; + raw_shape.resize(4); + + for (uint32_t i = 0; i < 4; ++i) + { + if (i >= descr.dimensions.size()) + { + raw_shape[i] = 1; + } + else + { + raw_shape[i] = descr.dimensions[i]; + } + } + + return nnfw::cker::GetShape(raw_shape); +} + +inline int32_t getAxis(uint32_t rank, int32_t axis, ir::Layout frontend_layout) +{ + auto ret = axis; + + if (axis < 0) + { + ret += rank; + } + + // NCHW -> NHWC + if (frontend_layout == ir::Layout::NCHW) + { + int32_t permutation[4] = {0, 3, 1, 2}; + ret = permutation[ret]; + } + + return ret; +} + +void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); + +void GetQuantizedConvolutionMultiplier(const TensorDescriptor &inputDescr, + const TensorDescriptor &filterDescr, + const TensorDescriptor &biasDescr, + const TensorDescriptor &outputDescr, float *multiplier); + +void QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier, + int *left_shift); + +void CalculateActivationRangeFloat(ir::Activation activation, float *activation_min, + float *activation_max); + +void CalculateActivationRangeUint8(ir::Activation activation, const TensorDescriptor &outputDescr, + int32_t *act_min, int32_t *act_max); + +bool HaveSameShapes(const TensorDescriptor *input1, const TensorDescriptor *input2); + +int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift); + +TensorDescriptor getTensorDescriptor(const ir::Operand &o, ir::Layout frontend_layout); + +uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions); + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__ diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.cc b/runtime/neurun/backend/cpu/kernel/PadLayer.cc new file mode 100644 index 000000000..1fd9429b5 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PadLayer.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PadLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Pad.h> + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +PadLayer::PadLayer() + : _inputData(), _outputData(), _inputDescr(), _outputDescr(), _padData(), _padRank(), + _constantValueData(), _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +void PadLayer::padFloat32() +{ + nnfw::cker::Pad(_padData, _padRank, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f, + _constantValueData.f); +} +void PadLayer::padQuant8() { throw std::runtime_error("Quantized Pad isn't supported NYI"); } + +void PadLayer::configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData, + const TensorDescriptor outputDescr, const int32_t *padData, + int32_t padRank, uint8_t *constantValueData) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _padData = padData; + _padRank = padRank; + _constantValueData.u8 = constantValueData; +} + +void PadLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + padFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + padQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/PadLayer.h b/runtime/neurun/backend/cpu/kernel/PadLayer.h new file mode 100644 index 000000000..f4413a8ed --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PadLayer.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +// Note, this is pad with mode=`CONSTANT`: it doesn't support `REFLECT` and `SYMMETRIC` +class PadLayer : public ::neurun::exec::IFunction +{ +public: + PadLayer(); + +public: + void padFloat32(); + + void padQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor inputDescr, uint8_t *outputData, + const TensorDescriptor outputDescr, const int32_t *padData, int32_t padRank, + uint8_t *constantValueData = nullptr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + const int32_t *_padData; + int32_t _padRank; + DataPtr _constantValueData; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_PADLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc new file mode 100644 index 000000000..6f28d8436 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PermuteLayer.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +using Type = ir::operation::Permute::Type; + +void PermuteLayer::configure(std::shared_ptr<backend::operand::ITensor> input, + std::shared_ptr<backend::operand::ITensor> output, + const ir::Shape &output_shape, Type type, ir::DataType dataType) +{ + _input = input; + _output = output; + _output_shape = output_shape; + _type = type; + _dataType = dataType; +} + +void PermuteLayer::run() +{ + using ir::DataType; + switch (_dataType) + { + case DataType::FLOAT32: + runTempl<float>(); + break; + case DataType::INT32: + runTempl<int32_t>(); + break; + case DataType::UINT32: + runTempl<uint32_t>(); + break; + case DataType::BOOL8: + case DataType::QUANT8_ASYMM: + runTempl<uint8_t>(); + break; + case DataType::QUANT8_SYMM: + runTempl<int8_t>(); + break; + default: + throw std::runtime_error("NYI"); + break; + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/PermuteLayer.h b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h new file mode 100644 index 000000000..1f9110807 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/PermuteLayer.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ + +#include <exec/IFunction.h> + +#include "util/feature/nhwc/View.h" +#include "OperationUtils.h" +#include "ir/operation/Permute.h" +#include "util/feature/nhwc/Reader.h" +#include "util/feature/nchw/View.h" +#include "util/Coordinates.h" + +#include <misc/feature/IndexIterator.h> +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class PermuteLayer : public ::neurun::exec::IFunction +{ +public: + PermuteLayer() = default; + +public: + void configure(std::shared_ptr<backend::operand::ITensor> input, + std::shared_ptr<backend::operand::ITensor> output, const ir::Shape &output_shape, + ir::operation::Permute::Type type, ir::DataType dataType); + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + template <class T> void runTempl() + { + auto rank = _output_shape.rank(); + auto fn = [&](::neurun::backend::operand::ITensor &in_tensor) { + _output->access([&](::neurun::backend::operand::ITensor &out_tensor) { + auto input_buffer = in_tensor.buffer(); + auto input_size = in_tensor.total_size(); + auto output_buffer = out_tensor.buffer(); + if (_type == ir::operation::Permute::Type::COPY) + { + assert(in_tensor.layout() == out_tensor.layout()); + if (!in_tensor.has_padding() && !out_tensor.has_padding()) + { + assert(input_size == out_tensor.total_size()); + memcpy(output_buffer, input_buffer, input_size); + return; + } + } + switch (rank) + { + case 0: + case 1: + { + const int32_t copy_len = _output_shape.dim(0); + + memcpy(output_buffer, input_buffer, copy_len); + break; + } + case 2: + { + const int32_t copy_len = _output_shape.dim(1); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + neurun::util::Coordinates coords{i, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + break; + } + case 3: + { + const int32_t copy_len = _output_shape.dim(2); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + for (auto j = 0; j < _output_shape.dim(1); ++j) + { + neurun::util::Coordinates coords{i, j, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + } + break; + } + case 4: + { + // TODO Unify permute type and remove switch case + switch (_type) + { + case ir::operation::Permute::Type::NHWC_TO_NCHW: + { + for (auto n = 0; n < _output_shape.dim(0); ++n) + { + for (auto c = 0; c < _output_shape.dim(1); ++c) + { + for (auto h = 0; h < _output_shape.dim(2); ++h) + { + for (auto w = 0; w < _output_shape.dim(3); ++w) + { + const neurun::util::Coordinates in_coords{n, h, w, c}; + const auto out_coords = + convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout()); + const auto value = + *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords)); + *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) = + value; + } + } + } + } + break; + } + case ir::operation::Permute::Type::NCHW_TO_NHWC: + { + for (auto n = 0; n < _output_shape.dim(0); ++n) + { + for (auto h = 0; h < _output_shape.dim(1); ++h) + { + for (auto w = 0; w < _output_shape.dim(2); ++w) + { + for (auto c = 0; c < _output_shape.dim(3); ++c) + { + const neurun::util::Coordinates in_coords{n, c, h, w}; + const auto out_coords = + convertCoordinates(in_coords, in_tensor.layout(), out_tensor.layout()); + const auto value = + *reinterpret_cast<T *>(input_buffer + in_tensor.calcOffset(in_coords)); + *reinterpret_cast<T *>(output_buffer + out_tensor.calcOffset(out_coords)) = + value; + } + } + } + } + break; + } + case ir::operation::Permute::Type::COPY: + { + const int32_t copy_len = _output_shape.dim(3); + + for (auto i = 0; i < _output_shape.dim(0); ++i) + { + for (auto j = 0; j < _output_shape.dim(1); ++j) + { + for (auto k = 0; k < _output_shape.dim(2); ++k) + { + neurun::util::Coordinates coords{i, j, k, 0}; + memcpy(output_buffer + out_tensor.calcOffset(coords), + input_buffer + in_tensor.calcOffset(coords), copy_len * sizeof(T)); + } + } + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + break; + } + default: + throw std::runtime_error("NYI"); + break; + } + }); + }; + _input->access(fn); + } + +private: + std::shared_ptr<backend::operand::ITensor> _input{nullptr}; + std::shared_ptr<backend::operand::ITensor> _output{nullptr}; + ir::Shape _output_shape{}; + ir::operation::Permute::Type _type{ir::operation::Permute::Type::COPY}; + ir::DataType _dataType{ir::DataType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_PERMUTE_LAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc new file mode 100644 index 000000000..caeee9f12 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ReshapeLayer.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +ReshapeLayer::ReshapeLayer() : _inputData(), _outputData(), _inputDescr(), _outputDescr() +{ + // DO NOTHING +} + +void ReshapeLayer::reshapeGeneric() +{ + size_t count = sizeOfData(_inputDescr.type, _inputDescr.dimensions); + memcpy(_outputData.v, _inputData.v, count); +} + +void ReshapeLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void ReshapeLayer::run() { reshapeGeneric(); } + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h new file mode 100644 index 000000000..25dd851b2 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/ReshapeLayer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class ReshapeLayer : public ::neurun::exec::IFunction +{ +public: + ReshapeLayer(); + +public: + void reshapeGeneric(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, uint8_t *outputData, + const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_RESHAPELAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc new file mode 100644 index 000000000..58ba109b4 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SoftMaxLayer.h" + +#include <cker/operation/SoftMax.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +SoftMaxLayer::SoftMaxLayer() + : _inputData(), _outputData(), _beta(0.0), _inputDescr(), _outputDescr(), + _inputType(OperandType::FLOAT32) +{ + // DO NOTHING +} + +// Performs softmax along the input of size (input_size * batch_size). +void Softmax(const float *in, const int input_size, const int batch_size, const float beta, + float *out) +{ + assert(input_size > 0); + + // For each batch + for (int b = 0; b < batch_size; b++) + { + // Find the max coeff. + float max_coeff = in[0]; + for (int i = 1; i < input_size; i++) + { + if (in[i] > max_coeff) + max_coeff = in[i]; + } + + // Compute the normalized sum of exps. + float exp_sum = 0.0; + for (int i = 0; i < input_size; i++) + { + out[i] = std::exp((in[i] - max_coeff) * beta); + exp_sum += out[i]; + } + + // Divide by the sum of exps. + float reciprocal_sum_exp = 1.f / exp_sum; + for (int i = 0; i < input_size; i++) + { + out[i] *= reciprocal_sum_exp; + } + + // Advance in and out pointers for the next batch. + in += input_size; + out += input_size; + } +} + +void SoftMaxLayer::softmaxFloat32() +{ + TensorDescriptor descrIn4D; + + if (getNumberOfDimensions(_inputDescr) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputDescr, 0); + if (batch_size == 0) + throw std::runtime_error("batch_size should not be 0"); + + uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size; + Softmax(_inputData.f, input_size, batch_size, _beta, _outputData.f); + } + else if (getNumberOfDimensions(_inputDescr) == 4) + { + nnfw::cker::SoftmaxParams op_params; + op_params.beta = _beta; + nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(_inputDescr), _inputData.f, + convertTensorDescriptorToCkerShape(_outputDescr), _outputData.f); + } + else + { + throw std::runtime_error{"only 2D and 4D tensors supported"}; + } +} + +void SoftMaxLayer::softmaxQuant8() +{ + TensorDescriptor descrIn4D = _inputDescr; + + if (getNumberOfDimensions(_inputDescr) == 2) + { + uint32_t batch_size = getSizeOfDimension(_inputDescr, 0); + if (batch_size == 0) + throw std::runtime_error("batch_size should not be 0"); + + uint32_t input_size = getNumberOfElements(_inputDescr) / batch_size; + descrIn4D.dimensions = {batch_size, 1, 1, input_size}; + } + else if (getNumberOfDimensions(_inputDescr) == 4) + { + descrIn4D = _inputDescr; + } + else + { + throw std::runtime_error{"only 2D and 4D tensors supported"}; + } + if (_outputDescr.offset != 0 || _outputDescr.scale != 1.f / 256) + { + throw std::runtime_error{"incorrect scale / offset for output"}; + } + static const int32_t kScaledDiffIntegerBits = 5; + const double input_beta_real_multiplier = std::min( + 1.0 * _beta * _inputDescr.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0); + int32_t input_multiplier = 0; + int32_t input_left_shift = 0; + QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier, + &input_left_shift); + float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift); + + nnfw::cker::SoftmaxParams op_params; + op_params.input_multiplier = input_multiplier; + op_params.input_left_shift = input_left_shift; + op_params.diff_min = diff_min; + nnfw::cker::Softmax(op_params, convertTensorDescriptorToCkerShape(descrIn4D), _inputData.u8, + convertTensorDescriptorToCkerShape(descrIn4D), _outputData.u8); +} + +void SoftMaxLayer::configure(uint8_t *inputData, const TensorDescriptor &inputDescr, + const float beta, uint8_t *outputData, + const TensorDescriptor &outputDescr) +{ + _inputData.u8 = inputData; + _inputDescr = inputDescr; + _inputType = inputDescr.type; + _outputData.u8 = outputData; + _outputDescr = outputDescr; + _beta = beta; +} + +void SoftMaxLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + softmaxFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + softmaxQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h new file mode 100644 index 000000000..4723afb72 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SoftMaxLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class SoftMaxLayer : public ::neurun::exec::IFunction +{ +public: + SoftMaxLayer(); + +public: + void softmaxFloat32(); + + void softmaxQuant8(); + + void configure(uint8_t *inputData, const TensorDescriptor &inputDescr, const float beta, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _inputData; + DataPtr _outputData; + + float _beta; + + TensorDescriptor _inputDescr; + TensorDescriptor _outputDescr; + + OperandType _inputType; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_SOFTMAXLAYER_H__ diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.cc b/runtime/neurun/backend/cpu/kernel/SubLayer.cc new file mode 100644 index 000000000..c6f7188e0 --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SubLayer.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SubLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +void SubLayer::subFloat32() +{ + float output_activation_min, output_activation_max; + CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam op_params; + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + const std::function<float(const float &, const float &)> fn = [](const float &a, const float &b) { + return a - b; + }; + + if (!HaveSameShapes(&_lhsDescr, &_rhsDescr)) + { + nnfw::cker::BroadcastBinaryArithmeticOpSlow( + op_params, convertToExtendedCkerShape(_lhsDescr), _lhsData.f, + convertToExtendedCkerShape(_rhsDescr), _rhsData.f, convertToExtendedCkerShape(_outputDescr), + _outputData.f, fn); + return; + } + + nnfw::cker::BinaryArithmeticOp(op_params, convertTensorDescriptorToCkerShape(_lhsDescr), + _lhsData.f, convertTensorDescriptorToCkerShape(_rhsDescr), + _rhsData.f, convertTensorDescriptorToCkerShape(_outputDescr), + _outputData.f, fn); +} + +void SubLayer::subQuant8() +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(_activation, _outputDescr, &output_activation_min, + &output_activation_max); + // nnfw::cker::SubParam op_params; + // op_params.quantized_activation_max = output_activation_max; + // op_params.quantized_activation_min = output_activation_min; + + // cker quant8 sub is not implemented yet + throw std::runtime_error{"NYI"}; +} + +void SubLayer::configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr) +{ + _lhsData.u8 = lhsData; + _lhsDescr = lhsDescr; + _rhsData.u8 = rhsData; + _rhsDescr = rhsDescr; + _inputType = lhsDescr.type; + _activation = activation; + _outputData.u8 = outputData; + _outputDescr = outputDescr; +} + +void SubLayer::run() +{ + if (_inputType == OperandType::FLOAT32) + { + subFloat32(); + } + else if (_inputType == OperandType::QUANT8_ASYMM) + { + subQuant8(); + } +} + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun diff --git a/runtime/neurun/backend/cpu/kernel/SubLayer.h b/runtime/neurun/backend/cpu/kernel/SubLayer.h new file mode 100644 index 000000000..c9abdb48c --- /dev/null +++ b/runtime/neurun/backend/cpu/kernel/SubLayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ +#define __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ + +#include <exec/IFunction.h> + +#include "OperationUtils.h" + +namespace neurun +{ +namespace backend +{ +namespace cpu +{ +namespace kernel +{ + +class SubLayer : public ::neurun::exec::IFunction +{ +public: + SubLayer() : _lhsData(), _rhsData(), _outputData(), _lhsDescr(), _rhsDescr(), _outputDescr() + { + // DO NOTHING + } + +public: + void subFloat32(); + + void subQuant8(); + + void configure(uint8_t *lhsData, const TensorDescriptor &lhsDescr, uint8_t *rhsData, + const TensorDescriptor &rhsDescr, const ir::Activation activation, + uint8_t *outputData, const TensorDescriptor &outputDescr); + + void run(); + void runSync() + { + // this abstract method is used just for profiling and called for + // backend::acl_common::AclFunction + run(); + } + +private: + DataPtr _lhsData; + DataPtr _rhsData; + DataPtr _outputData; + + TensorDescriptor _lhsDescr; + TensorDescriptor _rhsDescr; + TensorDescriptor _outputDescr; + + ir::Activation _activation{ir::Activation::NONE}; + + OperandType _inputType{OperandType::FLOAT32}; +}; + +} // namespace kernel +} // namespace cpu +} // namespace backend +} // namespace neurun + +#endif // __NEURUN_BACKEND_CPU_KERNEL_SUBLAYER_H__ |