diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
commit | 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch) | |
tree | 3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert/backend | |
parent | 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff) | |
download | nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip |
Imported Upstream version 1.9.0upstream/1.9.0submit/tizen/20200905.125700accepted/tizen/unified/20200906.032650
Diffstat (limited to 'runtime/onert/backend')
92 files changed, 2926 insertions, 6106 deletions
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 8aaf516cd..5c5041378 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -25,6 +25,7 @@ #include "KernelGenerator.h" #include "TensorManager.h" #include "Optimizer.h" +#include "AclTensorRegistry.h" namespace onert { @@ -47,10 +48,13 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor)); + auto tm = createTensorManager(is_linear_executor); + auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); + auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb); + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index d7f5f8031..31f1c10eb 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -24,78 +24,17 @@ namespace acl_cl { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : acl_common::AclConstantInitializer{operands, tensor_reg} { // DO NOTHING } -void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerCopyInitializer(input_index, input_obj); -} - -void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerPermuteInitializer(input_index, input_obj); -} - -void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) -{ - const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); - const auto &block_size_obj = _operands.at(block_size_index); - - if (block_size_obj.isConstant()) - { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; - } -} - -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); - copyInputInitialize(node, ir::operation::Conv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); - copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); -} - void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node) { copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS); } -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); - copyInputInitialize(node, ir::operation::FullyConnected::BIAS); -} - void ConstantInitializer::visit(const ir::operation::Gather &node) { copyInputInitialize(node, ir::operation::Gather::INDICES); @@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node) copyInputInitialize(node, ir::operation::HashtableLookup::KEYS); } -void ConstantInitializer::visit(const ir::operation::LSTM &node) -{ - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); -} - -void ConstantInitializer::visit(const ir::operation::RNN &node) -{ - copyInputInitialize(node, ir::operation::RNN::WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::BIAS); -} - void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) { const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); @@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) } } -void ConstantInitializer::visit(const ir::operation::TransposeConv &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerPermuteInitializer(kernel_index, kernel_obj); -} - } // namespace acl_cl } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h index c51f72b11..4f894fd31 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.h +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h @@ -17,9 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> -#include "TensorBuilder.h" +#include "AclConstantInitializer.h" namespace onert { @@ -28,32 +26,18 @@ namespace backend namespace acl_cl { -class ConstantInitializer : public IConstantInitializer +class ConstantInitializer : public acl_common::AclConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::HashtableLookup &) override; - void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::TransposeConv &) override; - -private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } - void copyInputInitialize(const ir::Operation &node, uint32_t index); - void permuteInputInitialize(const ir::Operation &node, uint32_t index); - -private: - std::shared_ptr<TensorBuilder> _tensor_builder; + using acl_common::AclConstantInitializer::visit; + void visit(const ir::operation::EmbeddingLookup &) final; + void visit(const ir::operation::Gather &) final; + void visit(const ir::operation::HashtableLookup &) final; + void visit(const ir::operation::SpaceToBatchND &) final; }; } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc index a84f983b4..94489253d 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.cc +++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc @@ -40,15 +40,16 @@ namespace backend namespace acl_cl { -using ::onert::backend::acl_common::asAclClFunction; +using ::onert::backend::acl_common::asAclFunction; using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder< - ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>; + ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>; -KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx, - const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder) +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); assert(_ctx.at(block_size_index).data()); - auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>(); + auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Cast &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + const auto activation = node.param().activation; - std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->data_type() == ofm_tensor->data_type()) - { - auto l = std::make_unique<::arm_compute::CLCopy>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); + const auto act_info = acl_common::asActivationLayerInfo(activation); - fn = std::move(l); - } - else + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().arithmetic_type) { - auto l = std::make_unique<::arm_compute::CLCast>(); - - // TODO Support converting float to int32 as round down - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); - - fn = std::move(l); + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE, act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE, act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + { + fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN, + act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info); + break; + } + default: + assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations"); + break; } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Conv2D &node) @@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) ker_width, ker_height); const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); - auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(), - ::arm_compute::Size2D(1U, 1U), act_info); + auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info, + ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) @@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); { - auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, multiplier, act_info); + auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>( + ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), + conv_info, multiplier, act_info); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } } -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node) return; } - auto output_tensor = _tensor_builder->at(ofm_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get(); std::vector<::arm_compute::ICLTensor *> input_tensors; for (auto &ifm_ind : input_indexes) - input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle()); std::unique_ptr<::arm_compute::IFunction> fn; if (input_indexes.size() < 2) { - auto l = std::make_unique<::arm_compute::CLCopy>(); - l->configure(input_tensors.at(0), output_tensor->handle()); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0), + output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::CLConcatenateLayer>(); const auto rank = _ctx.at(ofm_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); - l->configure(input_tensors, output_tensor->handle(), fixed_axis); - fn = std::move(l); + fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>( + input_tensors, output_tensor->handle(), fixed_axis); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::FullyConnected &node) { const auto output_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->at(output_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); const auto activation = node.param().activation; - auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor, + auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor, ::arm_compute::CLFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale - arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Reduce &node) { const auto output_index{node.getOutputs().at(0)}; @@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto keep_dims{node.param().keep_dims}; const auto reduce_type = node.param().reduce_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); @@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) std::unique_ptr<arm_compute::IFunction> fn; if (reduce_type == ir::operation::Reduce::ReduceType::MEAN) { - auto l = std::make_unique<::arm_compute::CLReduceMean>(); - const auto acl_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); - l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes, + keep_dims, output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::CLReduceOperation>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout); - l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims, - acl_common::convertReduceType(reduce_type)); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLReduceOperation>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type)); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reshape &node) @@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. @@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) UNUSED_RELEASE(frontend_layout); UNUSED_RELEASE(backend_layout); - auto fn = std::make_unique<::arm_compute::CLReshapeLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Squeeze &node) @@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) (void)dims; (void)ndim; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto fn = std::make_unique<arm_compute::CLReshapeLayer>(); - fn->configure(input_tensor->handle(), output_tensor->handle()); - auto acl_fn = asAclClFunction(std::move(fn)); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Tanh &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::CLActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Softmax &node) @@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(input_tensor->handle(), output_tensor->handle(), beta); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), beta); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Slice &node) @@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) ends_set.set(i, ends[i]); } - auto fn = std::make_unique<::arm_compute::CLSlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::StridedSlice &node) @@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) strides_set.set(i, strides[i]); } - auto fn = std::make_unique<::arm_compute::CLStridedSlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, - strides_set, begin_mask, end_mask, shrink_axis_mask); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set, + begin_mask, end_mask, shrink_axis_mask); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Transpose &node) @@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) const auto rank = _ctx.at(ifm_idx).shape().rank(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector( rank, pv, frontend_layout, backend_layout); - auto fn = std::make_unique<::arm_compute::CLPermute>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Add &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); + auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), backend_pv); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Sub &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; + const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>(); + const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo( + node.param().op_type, node.param().alpha, node.param().beta); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); + auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), act_info); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Div &node) +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + { + fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(), + arm_compute::BinaryLogicalOperation::AND); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + { + fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + { + fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + { + fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + default: + { + std::string err_msg("acl_cl KernelGenerator : " + node.name() + + "is not elementwise-binary operations"); + assert(false && err_msg.c_str()); + break; + } + } - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Exp &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::CLExpLayer>(); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseUnary::Type::ABS: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - fn->configure(input_tensor->handle(), output_tensor->handle()); + fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + case ir::operation::ElementwiseUnary::Type::CAST: + { + if (input_tensor->data_type() == output_tensor->data_type()) + { + fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(), + output_tensor->handle()); + ; + } + else + { + // TODO Support converting float to int32 as round down + fn = acl_common::generateLayer<arm_compute::CLCast>( + input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); + } + break; + } + case ir::operation::ElementwiseUnary::Type::DEQUANTIZE: + { + fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::EXP: + { + fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::FLOOR: + { + fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + { + fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::NEG: + { + fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::RSQRT: + { + fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::SQRT: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - auto acl_fn = asAclClFunction(std::move(fn)); + fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + default: + { + throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet"); + break; + } + } + + auto acl_fn = asAclFunction(std::move(fn)); _return_fn = std::move(acl_fn); } @@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLReshapeLayer>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - fn->configure(input_tensor->handle(), output_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::InstanceNorm &node) @@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node) const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto gamma_tensor = _tensor_builder->at(gamma_index).get(); - auto beta_tensor = _tensor_builder->at(beta_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get(); + auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get(); auto epsilon = node.param().epsilon; auto activation = node.param().activation; - auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), - beta_tensor->handle(), epsilon); + auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(), + epsilon); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Logistic &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalAnd &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - ::arm_compute::BinaryLogicalOperation::AND); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::LSTM &node) { - _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor, - ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder); + _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor, + ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg); } void KernelGenerator::visit(const ir::operation::Comparison &node) @@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto comparison_type = node.param().comparison_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLComparison>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - (arm_compute::ComparisonOperation)comparison_type); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get(); + auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLComparison>( + input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), + (arm_compute::ComparisonOperation)comparison_type); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Pack &node) @@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : node.getInputs()) input_indexes.emplace_back(input_index); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); std::vector<arm_compute::ICLTensor *> inputs; for (const auto &input_index : input_indexes) - inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout(); if (axis < 0) axis += output_rank; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLStackLayer>(); - // Disable applied dim_correction std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes; for (const auto &input_index : input_indexes) { size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape()); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) @@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) } } - fn->configure(inputs, axis, output); + auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output); // Revert disabling applied dim_correction assert(inputs.size() == orig_inputs_acl_tensor_shapes.size()); @@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node) inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i)); } - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Pool2D &node) +{ + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( + node, _ctx, _tensor_reg, _current_op_seq_layout, + acl_common::convertPoolType(node.param().op_type)); + + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + const auto activation = node.param().activation; + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Permute &node) @@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node) const auto ofm_idx{node.getOutputs().at(0)}; const auto ifm_idx{node.getInputs().at(0)}; const auto permute_type = node.getPermuteType(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto rank = _ctx.at(ofm_idx).shape().rank(); assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); @@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node) // WHCN -> CWHN pv = arm_compute::PermutationVector{2, 0, 1}; - auto l = std::make_unique<::arm_compute::CLPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) { // CWHN -> WHCN pv = arm_compute::PermutationVector{1, 2, 0}; - auto l = std::make_unique<::arm_compute::CLPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else { - auto l = std::make_unique<::arm_compute::CLCopy>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle()); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::CLActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) @@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::CLScale>(); + auto fn = acl_common::generateLayer<arm_compute::CLScale>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR, + ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f), + ::arm_compute::SamplingPolicy::TOP_LEFT); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), - ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, - ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU1 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::ReLU6 &node) +void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; + const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)}; - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); + auto fn = acl_common::generateLayer<arm_compute::CLScale>( + ifm_tensor->handle(), ofm_tensor->handle(), + ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE, + ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::RNN &node) @@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node) const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto weights_tensor = _tensor_builder->at(weights_index).get(); - auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); - auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get(); + auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); + auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get(); auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - auto copy_layer = std::make_unique<::arm_compute::CLCopy>(); - copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); - _return_fn = asAclClFunction(std::move(copy_layer)); + auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>( + hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); + _return_fn = asAclFunction(std::move(copy_layer)); - auto fn = std::make_unique<::arm_compute::CLRNNLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_tensor->handle(), weights_tensor->handle(), - recurrent_weights_tensor->handle(), bias_tensor->handle(), - hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::Floor &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLFloor>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(), + hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) @@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); - auto paddings_tensor = _tensor_builder->at(paddings_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); + auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get(); assert(_ctx.at(block_size_index).data()); assert(_ctx.at(paddings_index).data()); - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>(); - l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), - ofm_tensor->handle()); - fn = std::move(l); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), + ofm_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) @@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) auto block_size = node.param().block_size; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); -void KernelGenerator::visit(const ir::operation::L2Pool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2); + auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), block_size); - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) @@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); - - auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>(); - - fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>( + values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::L2Normalization &node) @@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) float bias = 0.0f; // Don't offset the reduction. - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, radius, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::HashtableLookup &node) @@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node) const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hits_tensor = _tensor_builder->at(hits_index).get(); - - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto keys_tensor = _tensor_builder->at(keys_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); - - auto fn = std::make_unique<::arm_compute::CLHashtableLookup>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get(); - fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), - output_tensor->handle(), hits_tensor->handle()); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>( + lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), + output_tensor->handle(), hits_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::PReLU &node) @@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node) const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto alpha_tensor = _tensor_builder->at(alpha_index).get(); - - auto fn = std::make_unique<::arm_compute::CLPReluLayer>(); - - fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>( + ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TransposeConv &node) @@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); } - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); - auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), - tconv_info, invalid_horizontal, invalid_vertical); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::SQRT &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBitwiseOr>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalNot &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBitwiseNot>(); + auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal, + invalid_vertical); - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SquaredDifference &node) @@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node) const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>(); + auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TopKV2 &node) @@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node) const auto k = node.param().k; - auto values_tensor = _tensor_builder->at(outputValues_index).get(); - auto indices_tensor = _tensor_builder->at(outputIndices_index).get(); - auto input_tensor = _tensor_builder->at(inputData_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get(); - auto fn = std::make_unique<::arm_compute::CLTopKV2>(); + auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>( + input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle()); - fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Gather &node) @@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw); const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto indices_tensor = _tensor_builder->at(indices_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get(); // NOTE The frontend layout and backend layout must be the same for this operation. // If not the same, we have to add a stage(?) to perform permutation of output tensor. It @@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); - auto fn = std::make_unique<::arm_compute::CLGatherEx>(); - // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; assert(n == ifm_tensor->num_dimensions()); @@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node) acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false)); } - fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); + auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>( + ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); // Revert disabling applied dim_correction ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape); indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLNeg>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ArgMax &node) @@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) assert((ifm_shape.rank() - 1) == ofm_shape.rank()); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); auto frontend_layout = _current_op_seq_layout; auto backend_layout = ifm_tensor->layout(); @@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>(); + auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>( + ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), + ::arm_compute::ReductionOperation::ARG_IDX_MAX); - fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), - ::arm_compute::ReductionOperation::ARG_IDX_MAX); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Dequantize &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) @@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod auto beta = node.param().beta; auto bias = node.param().bias; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo( ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthToSpace &node) @@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node) auto block_size = node.param().block_size; assert(block_size > 0); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), block_size); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>( + input_tensor->handle(), output_tensor->handle(), block_size); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Split &node) @@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &output : node.getOutputs()) output_indexes.emplace_back(output); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); std::vector<arm_compute::ICLTensor *> output_tensors; for (const auto &ofm_ind : output_indexes) - output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle()); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) axis += ifm_rank; axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLSplit>(); - - fn->configure(ifm_tensor->handle(), output_tensors, axis); + auto fn = + acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Unpack &node) @@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : node.getOutputs()) output_indexes.emplace_back(output_index); - auto input = _tensor_builder->at(input_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); std::vector<arm_compute::ICLTensor *> outputs; for (const auto &output_index : output_indexes) - outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); if (axis < 0) axis += input_rank; axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); @@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) { size_t output_rank = _ctx.at(output_index).shape().rank(); - const auto &output_tensor = _tensor_builder->at(output_index); + const auto &output_tensor = _tensor_reg->getAclTensor(output_index); orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape()); assert(output_rank == output_tensor->num_dimensions()); if (output_rank != output_tensor->info()->num_dimensions()) @@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) } } - auto fn = std::make_unique<::arm_compute::CLUnstack>(); - - fn->configure(input, outputs, axis); + auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Pad &node) @@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()); const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info); - auto input = _tensor_builder->at(input_index).get()->handle(); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); ::arm_compute::PaddingList padding_list; padding_list.resize(rank); @@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node) acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; } - auto fn = std::make_unique<::arm_compute::CLPadLayer>(); // Disable applied dim_correction size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) { @@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node) _ctx.at(input_index).shape(), frontend_layout, backend_layout, false)); } - fn->configure(input, output, padding_list, pixel_value); + auto fn = + acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value); // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension // It would produce a mistach of result - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLElementwiseMin>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLElementwiseMax>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node) @@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, - 0); + auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node) @@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, - 0); + auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h index 1e3b06489..d188d6d83 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.h +++ b/runtime/onert/backend/acl_cl/KernelGenerator.h @@ -21,6 +21,8 @@ #include "ir/Operands.h" #include "TensorBuilder.h" +#include "AclTensorRegistry.h" +#include "TensorManager.h" namespace onert { @@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Mul &) override; void visit(const ir::operation::Reduce &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::Transpose &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Sub &) override; - void visit(const ir::operation::Cast &) override; - void visit(const ir::operation::Div &) override; - void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::InstanceNorm &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::LogicalAnd &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; - void visit(const ir::operation::RSQRT &) override; - void visit(const ir::operation::ReLU &) override; void visit(const ir::operation::ResizeBilinear &) override; - void visit(const ir::operation::ReLU1 &) override; - void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::ResizeNearestNeighbor &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::Floor &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::L2Pool2D &) override; void visit(const ir::operation::EmbeddingLookup &) override; void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::TransposeConv &) override; - void visit(const ir::operation::SQRT &) override; - void visit(const ir::operation::LogicalOr &) override; - void visit(const ir::operation::LogicalNot &) override; void visit(const ir::operation::SquaredDifference &) override; void visit(const ir::operation::TopKV2 &) override; void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::Neg &) override; - void visit(const ir::operation::Abs &) override; void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::Dequantize &) override; void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::Unpack &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Max &) override; void visit(const ir::operation::ConvertFp32ToFp16 &) override; void visit(const ir::operation::ConvertFp16ToFp32 &) override; @@ -104,6 +88,7 @@ private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; ir::Layout _current_op_seq_layout; }; diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc index 6ba3143e8..9134d3fb8 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.cc +++ b/runtime/onert/backend/acl_cl/Optimizer.cc @@ -19,7 +19,7 @@ #include "ParentInfo.h" #include <cassert> -#include <ir/LoweredGraph.h> +#include <compiler/LoweredGraph.h> #include <util/logging.h> #include "AclSubTensorAnalyzer.h" diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h index bdbd0364e..ab295dbec 100644 --- a/runtime/onert/backend/acl_cl/TensorManager.h +++ b/runtime/onert/backend/acl_cl/TensorManager.h @@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager< using TensorManager = acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>; -TensorManager *createTensorManager(bool is_linear_executor) +inline TensorManager *createTensorManager(bool is_linear_executor) { if (is_linear_executor) { diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc new file mode 100644 index 000000000..6ad5b7b69 --- /dev/null +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AclConstantInitializer.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : IConstantInitializer{operands}, _tensor_reg{tensor_reg} +{ + // DO NOTHING +} + +void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) +{ + assert(node.getInputs().size() > index); + + const auto &input_index = node.getInputs().at(index); + const auto &input_obj = _operands.at(input_index); + registerCopyInitializer(input_index, input_obj); +} + +void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) +{ + assert(node.getInputs().size() > index); + + const auto &input_index = node.getInputs().at(index); + const auto &input_obj = _operands.at(input_index); + registerPermuteInitializer(input_index, input_obj); +} + +void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { + assert(model_obj.data()); + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } +} + +void AclConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); + copyInputInitialize(node, ir::operation::Conv2D::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); + copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::FullyConnected &node) +{ + copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); + copyInputInitialize(node, ir::operation::FullyConnected::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::LSTM &node) +{ + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::RNN &node) +{ + copyInputInitialize(node, ir::operation::RNN::WEIGHTS); + copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); + copyInputInitialize(node, ir::operation::RNN::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::TransposeConv &node) +{ + permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL); +} + +} // namespace acl_common +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h new file mode 100644 index 000000000..52f4c54cf --- /dev/null +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ +#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "AclTensorRegistry.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +class AclConstantInitializer : public IConstantInitializer +{ +public: + AclConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<ITensorRegistry> &tensor_reg); + +public: + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::TransposeConv &) override; + +protected: + void copyInputInitialize(const ir::Operation &node, uint32_t index); + void permuteInputInitialize(const ir::Operation &node, uint32_t index); + +private: + std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; } + +protected: + std::shared_ptr<ITensorRegistry> _tensor_reg; +}; + +} // namespace acl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h index 85b18e847..94b65863a 100644 --- a/runtime/onert/backend/acl_common/AclFunction.h +++ b/runtime/onert/backend/acl_common/AclFunction.h @@ -47,12 +47,6 @@ private: std::unique_ptr<::arm_compute::IFunction> _func; }; -class AclClFunction : public AclFunction -{ -public: - using AclFunction::AclFunction; -}; - } // namespace acl_common } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h index 9f7ce3764..372ce689e 100644 --- a/runtime/onert/backend/acl_common/AclKernelGen.h +++ b/runtime/onert/backend/acl_common/AclKernelGen.h @@ -30,11 +30,32 @@ namespace backend namespace acl_common { +template <typename Layer, typename... Args> +std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args) +{ + auto l = std::make_unique<Layer>(); + + l->configure(std::forward<Args>(args)...); + + return l; +} + +template <typename Layer, typename... Args> +std::unique_ptr<arm_compute::IFunction> +generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args) +{ + auto l = std::make_unique<Layer>(memory_manager); + + l->configure(std::forward<Args>(args)...); + + return l; +} + template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer, - typename T_TensorBuilder> -std::unique_ptr<exec::IFunction> -kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder) + typename T_TensorRegistry> +std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, + const ir::Operands &operands, + const std::shared_ptr<T_TensorRegistry> &tensor_reg) { // TODO Support dynamic rnn // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. @@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const auto projection_clip = projection_threshold; assert(cell_clip >= 0.f && projection_clip >= 0.f); - auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get(); - auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get(); - auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get(); - auto output_tensor = tensor_builder->at(output_index).get(); + auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get(); + auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get(); + auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get(); + auto output_tensor = tensor_reg->getAclTensor(output_index).get(); - auto input_tensor = tensor_builder->at(input_index).get(); + auto input_tensor = tensor_reg->getAclTensor(input_index).get(); - auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get(); - auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get(); - auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get(); + auto input_to_forget_weights_tensor = + tensor_reg->getAclTensor(input_to_forget_weights_index).get(); + auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get(); + auto input_to_output_weights_tensor = + tensor_reg->getAclTensor(input_to_output_weights_index).get(); auto recurrent_to_forget_weights_tensor = - tensor_builder->at(recurrent_to_forget_weights_index).get(); - auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get(); + auto recurrent_to_cell_weights_tensor = + tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get(); auto recurrent_to_output_weights_tensor = - tensor_builder->at(recurrent_to_output_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_output_weights_index).get(); - auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get(); - auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get(); - auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get(); - auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get(); - auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get(); + auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get(); + auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get(); + auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get(); + auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get(); + auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get(); - auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - - auto fn = std::make_unique<T_ACLLayer>(); + auto act_info = asActivationLayerInfo(activation); ::arm_compute::LSTMParams<T_Tensor> lstm_params{}; if (has_cifg_param) { auto input_to_input_weights_tensor = - tensor_builder->at(input_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional auto recurrent_to_input_weights_tensor = - tensor_builder->at(recurrent_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional auto cell_to_input_weights_handle = - has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle() + has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle() : nullptr; // optional (non-cifg && peephole) - auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional + auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(), recurrent_to_input_weights_tensor->handle(), cell_to_input_weights_handle, input_gate_bias_tensor->handle()); @@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, if (has_peephole_param) { auto cell_to_forget_weights_tensor = - tensor_builder->at(cell_to_forget_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional auto cell_to_output_weights_tensor = - tensor_builder->at(cell_to_output_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(), cell_to_output_weights_tensor->handle()); } if (has_projection_param) { - auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional - auto projection_bias_handle = has_projection_bias - ? tensor_builder->at(projection_bias_index).get()->handle() - : nullptr; // optional + auto projection_weights_tensor = + tensor_reg->getAclTensor(projection_weights_index).get(); // optional + auto projection_bias_handle = + has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle() + : nullptr; // optional lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle); } - fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(), - input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), - recurrent_to_forget_weights_tensor->handle(), - recurrent_to_cell_weights_tensor->handle(), - recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), - cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), - output_state_in_tensor->handle(), cell_state_in_tensor->handle(), - scratch_buffer_tensor->handle(), output_state_out_tensor->handle(), - cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, - cell_clip, projection_clip); + auto fn = generateLayer<T_ACLLayer>( + input_tensor->handle(), input_to_forget_weights_tensor->handle(), + input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), + recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(), + recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), + cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), + output_state_in_tensor->handle(), cell_state_in_tensor->handle(), + scratch_buffer_tensor->handle(), output_state_out_tensor->handle(), + cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip, + projection_clip); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer, - typename T_TensorBuilder> + typename T_TensorBuilder, typename T_TensorRegistry> std::unique_ptr<exec::IFunction> kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout) + const std::shared_ptr<T_TensorBuilder> &tensor_builder, + const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout) { using ir::operation::FullyConnected; @@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope reshape.dim(1) = input_size; /* W */ } - auto output_tensor = tensor_builder->at(output_index).get(); - const auto input_tensor = tensor_builder->at(input_index).get(); - const auto weight_tensor = tensor_builder->at(weight_index).get(); - const auto bias_tensor = tensor_builder->at(bias_index).get(); + auto output_tensor = tensor_reg->getAclTensor(output_index).get(); + const auto input_tensor = tensor_reg->getAclTensor(input_index).get(); + const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get(); + const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get(); const auto frontend_layout = layout; const auto acl_layout = output_tensor->handle()->info()->data_layout(); - auto fn = - std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL; if (operands.at(weight_index).isConstant()) { @@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope assert(operands.at(weight_index).data()); } - fn->configure( - input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(), - output_tensor->handle(), needs_reshape, - ::onert::backend::acl_common::asTensorShape( - reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)), - kernel_type); + auto fn = generateLayer<T_ACLLayer>( + tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape, + asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } -template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder> +template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry> std::unique_ptr<::arm_compute::IFunction> kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout, + const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout, ::arm_compute::PoolingType pooling_type) { const auto ofm_index{node.getOutputs().at(0)}; @@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl; VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl; - auto ofm_tensor = tensor_builder->at(ofm_index).get(); - auto ifm_tensor = tensor_builder->at(ifm_index).get(); + auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get(); ::arm_compute::PoolingLayerInfo info{ pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(), - acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; - - auto fn = std::make_unique<T_ACLLayer>(); + asPadStrideInfo(padding, stride), true /* exclude_padding */}; - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info); + auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info); return fn; } diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index 6b03fdf7f..91452014b 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -25,6 +25,7 @@ #include "ir/OperandIndexMap.h" #include <ir/Operands.h> #include "AclTensorManager.h" +#include "AclTensorRegistry.h" #include <memory> #include "ParentInfo.h" #include <util/Utils.h> @@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder public: using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; - AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); + AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr, + const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg); /** * @brief Register tensor information to allocate on ACL-CL backend @@ -63,19 +65,13 @@ public: void notifyLastUse(const ir::OperandIndex &) override; bool isRegistered(const ir::OperandIndex &) const override; - std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; } void prepare(void) override; void allocate() override; void postFunctionPrepare() override; - std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override; - void iterate(const IterateFunction &fn) override; - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; - std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind); - T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } void setUsesCount(const ir::OperandIndex &index, size_t num_uses) @@ -100,8 +96,6 @@ public: */ bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child); - bool supportDynamicTensor() override { return false; } - private: void buildTensors(void); ir::OperandIndex findRootParent(ir::OperandIndex index); @@ -113,6 +107,7 @@ private: ir::OperandIndexMap<size_t> _uses_count_map; std::unique_ptr<T_AclTensorManager> _tensor_mgr; + std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg; // for linear executor std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; @@ -140,9 +135,10 @@ namespace acl_common { template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands, - T_AclTensorManager *tensor_mgr) - : _operands{operands}, _tensor_mgr{tensor_mgr} +AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder( + const ir::Operands &operands, T_AclTensorManager *tensor_mgr, + const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg) + : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg} { assert(_tensor_mgr); } @@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi } template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -std::shared_ptr<ITensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind) -{ - return _tensor_mgr->at(ind); -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn) -{ - _tensor_mgr->iterate(fn); -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -std::shared_ptr<T_ITensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind) -{ - auto ret = _tensor_mgr->at(ind); - assert(ret != nullptr); - return ret; -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> std::unique_ptr<ITensorManager> AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void) { diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h new file mode 100644 index 000000000..1ef9f4b35 --- /dev/null +++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ + +#include "backend/ITensorRegistry.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +/** + * @brief Tensor registry class for acl backends + * + * This is implemented as a wrapper of AclTensorManager. + */ +template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry +{ +public: + AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {} + + std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override + { + return _tensor_mgr->at(ind); + } + + std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override + { + return getITensor(ind); + } + + auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); } + +private: + T_AclTensorManager *_tensor_mgr; +}; + +} // namespace acl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc index a5bbe1691..67dcc8192 100644 --- a/runtime/onert/backend/acl_common/Convert.cc +++ b/runtime/onert/backend/acl_common/Convert.cc @@ -18,6 +18,7 @@ #include "Swizzle.h" #include "ir/DataType.h" +#include "ir/operation/ElementwiseActivation.h" #include <memory> namespace @@ -177,6 +178,50 @@ namespace acl_common } } +::arm_compute::ActivationLayerInfo +asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha, + float beta) +{ + switch (op_type) + { + case ir::operation::ElementwiseActivation::Type::RELU: + if (beta == 0.f) + { + if (alpha == ir::operation::ElementwiseActivation::infinity) + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + } + else + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha}; + } + } + else + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta}; + } + case ir::operation::ElementwiseActivation::Type::TANH: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta}; + case ir::operation::ElementwiseActivation::Type::LOGISTIC: + // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0. + // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always + // 0(always sigmoid) regardless of values of the parameter. + // If ACL support non-sigmoid logistic, should fix param values. + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; + case ir::operation::ElementwiseActivation::Type::LEAKY_RELU: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha}; + default: + throw std::runtime_error{"Not supported, yet"}; + break; + } +} + arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank, ir::Layout frontend_layout, ir::Layout backend_layout) { @@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct return std::make_unique<AclFunction>(std::move(layer)); } -std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer) -{ - return std::make_unique<AclClFunction>(std::move(layer)); -} - ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout) { switch (data_layout) @@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) } } +arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir) +{ + switch (pool_type_ir) + { + case ir::operation::Pool2D::PoolType::AVG: + return arm_compute::PoolingType::AVG; + case ir::operation::Pool2D::PoolType::L2: + return arm_compute::PoolingType::L2; + case ir::operation::Pool2D::PoolType::MAX: + return arm_compute::PoolingType::MAX; + default: + throw std::runtime_error("convertPoolType: Not supported operation yet"); + } +} + arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir) { switch (reduce_type_ir) diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h index 9362098a5..380321c07 100644 --- a/runtime/onert/backend/acl_common/Convert.h +++ b/runtime/onert/backend/acl_common/Convert.h @@ -25,7 +25,9 @@ #include "ir/Layout.h" #include "ir/InternalType.h" #include "ir/Operand.h" +#include "ir/operation/Pool2D.h" #include "ir/operation/Reduce.h" +#include "ir/operation/ElementwiseActivation.h" #include "ir/Shape.h" #include "ir/TypeInfo.h" #include "ir/Coordinates.h" @@ -59,6 +61,9 @@ namespace acl_common const ir::Stride &stride); ::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code); +::arm_compute::ActivationLayerInfo +asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha, + float beta); arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank, ir::Layout frontend_layout, ir::Layout backend_layout); @@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr ir::Layout backend_layout); std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer); -std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer); template <typename T_Function> std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn) @@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout); ir::DataType asRuntimeDataType(::arm_compute::DataType data_type); +arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir); arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir); } // namespace acl_common diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index a0b145e19..35d6e4e8e 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -48,10 +48,13 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor)); + auto tm = createTensorManager(is_linear_executor); + auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); + auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb); + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc index 4191b277f..79edb9ded 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc @@ -24,100 +24,12 @@ namespace acl_neon { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : acl_common::AclConstantInitializer{operands, tensor_reg} { // DO NOTHING } -void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerCopyInitializer(input_index, input_obj); -} - -void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerPermuteInitializer(input_index, input_obj); -} - -void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) -{ - const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); - const auto &block_size_obj = _operands.at(block_size_index); - - if (block_size_obj.isConstant()) - { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; - } -} - -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); - copyInputInitialize(node, ir::operation::Conv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); - copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); - copyInputInitialize(node, ir::operation::FullyConnected::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::LSTM &node) -{ - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); -} - -void ConstantInitializer::visit(const ir::operation::RNN &node) -{ - copyInputInitialize(node, ir::operation::RNN::WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::BIAS); -} - void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) { const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); @@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) } } -void ConstantInitializer::visit(const ir::operation::TransposeConv &node) -{ - permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL); -} - } // namespace acl_neon } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h index 6b4c1f145..c7d71cdcf 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.h +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h @@ -17,9 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> -#include "TensorBuilder.h" +#include "AclConstantInitializer.h" namespace onert { @@ -28,29 +26,15 @@ namespace backend namespace acl_neon { -class ConstantInitializer : public IConstantInitializer +class ConstantInitializer : public acl_common::AclConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::TransposeConv &) override; - -private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } - void copyInputInitialize(const ir::Operation &node, uint32_t index); - void permuteInputInitialize(const ir::Operation &node, uint32_t index); - -private: - std::shared_ptr<TensorBuilder> _tensor_builder; + using acl_common::AclConstantInitializer::visit; + void visit(const ir::operation::SpaceToBatchND &node) final; }; } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index 1195b83cc..6d53c1245 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction; using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder< ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>; -KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx, - const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder) +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) } } -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::ArgMax &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); auto frontend_layout = _current_op_seq_layout; auto backend_layout = ifm_tensor->layout(); @@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>(); - - fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), - arm_compute::ReductionOperation::ARG_IDX_MAX); + auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>( + ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), + arm_compute::ReductionOperation::ARG_IDX_MAX); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) @@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); assert(_ctx.at(block_size_index).data()); - auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>(); - - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Cast &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->data_type() == ofm_tensor->data_type()) - { - auto l = std::make_unique<::arm_compute::NECopy>(); + const auto activation = node.param().activation; - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn = std::move(l); - } - else + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().arithmetic_type) { - auto l = std::make_unique<::arm_compute::NECast>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); - - fn = std::move(l); + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + { + fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + { + fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + { + // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO + fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + break; + } + default: + assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations"); + break; } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Conv2D &node) @@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) ker_width, ker_height); const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); - auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(), - ::arm_compute::Size2D(1U, 1U), act_info); + auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info, + ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node) auto block_size = node.param().block_size; assert(block_size > 0); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>( + input_tensor->handle(), output_tensor->handle(), block_size); - fn->configure(input_tensor->handle(), output_tensor->handle(), block_size); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) @@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); { - auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, multiplier, act_info); + auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>( + ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), + conv_info, multiplier, act_info); _return_fn = asAclFunction(std::move(fn)); } } -void KernelGenerator::visit(const ir::operation::Dequantize &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node) return; } - auto output_tensor = _tensor_builder->at(ofm_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get(); std::vector<::arm_compute::ITensor *> input_tensors; for (const auto &ifm_ind : input_indexes) - input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle()); std::unique_ptr<::arm_compute::IFunction> fn; if (input_indexes.size() < 2) { - auto l = std::make_unique<::arm_compute::NECopy>(); - l->configure(input_tensors.at(0), output_tensor->handle()); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0), + output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEConcatenateLayer>(); const auto rank = _ctx.at(ofm_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); - l->configure(input_tensors, output_tensor->handle(), fixed_axis); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>( + input_tensors, output_tensor->handle(), fixed_axis); } - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { - const auto output_index{node.getOutputs().at(0)}; - const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; - const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; + + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo( + node.param().op_type, node.param().alpha, node.param().beta); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); + std::unique_ptr<arm_compute::IFunction> fn; + if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC) + { + // NOTE NEActivationLayer can generate produce erroneous results. it were caused by + // 'vexpq_f32()'. + // The neon function returns a value outside of the limit of representation in float as 'NaN' + // instead of 'INF', and then the result of this op will be errors due to the 'NaN'. + fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), act_info); + } + else + { + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(), + ofm_tensor->handle(), act_info); + } - auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>(); + _return_fn = asAclFunction(std::move(fn)); +} - fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto acl_fn = asAclFunction(std::move(fn)); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - _return_fn = std::move(acl_fn); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + { + fn = acl_common::generateLayer<arm_compute::NELogicalAnd>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + { + fn = acl_common::generateLayer<arm_compute::NELogicalOr>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + default: + { + std::string err_msg("acl_neon KernelGenerator : " + node.name() + + "is not elementwise-binary operations"); + assert(false && err_msg.c_str()); + break; + } + } + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Floor &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; + + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseUnary::Type::ABS: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + case ir::operation::ElementwiseUnary::Type::CAST: + { + if (input_tensor->data_type() == output_tensor->data_type()) + { + fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(), + output_tensor->handle()); + } + else + { + fn = acl_common::generateLayer<arm_compute::NECast>( + input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); + } + break; + } + case ir::operation::ElementwiseUnary::Type::DEQUANTIZE: + { + fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::EXP: + { + fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::FLOOR: + { + fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + { + fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::NEG: + { + fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::RSQRT: + { + fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::SQRT: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - auto fn = std::make_unique<::arm_compute::NEFloor>(); + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + default: + { + throw std::runtime_error("acl_neon KernelGenerator : " + node.name() + + "is not supported yet"); + break; + } + } + _return_fn = asAclFunction(std::move(fn)); +} - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); +void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - auto acl_fn = asAclFunction(std::move(fn)); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - _return_fn = std::move(acl_fn); + auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>( + values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); + + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::FullyConnected &node) { const auto output_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->at(output_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); const auto activation = node.param().activation; auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor, ::arm_compute::NEFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node) const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hits_tensor = _tensor_builder->at(hits_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto keys_tensor = _tensor_builder->at(keys_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto fn = std::make_unique<::arm_compute::NEHashtableLookup>(); + auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>( + lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), + output_tensor->handle(), hits_tensor->handle()); - fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), - output_tensor->handle(), hits_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Gather &node) @@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // Converting in reverse order const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto indices_tensor = _tensor_builder->at(indices_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get(); const auto backend_layout = ofm_tensor->layout(); UNUSED_RELEASE(backend_layout); @@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); - auto fn = std::make_unique<::arm_compute::NEGatherEx>(); - // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; assert(n == ifm_tensor->num_dimensions()); @@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node) acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false)); } - fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); + auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>( + ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would // use arm_compute::TensorInfo::offset_element_in_bytes() // It would create an error when the kernel accesses high dimension that its value is 1 - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::InstanceNorm &node) @@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node) const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto gamma_tensor = _tensor_builder->at(gamma_index).get(); - auto beta_tensor = _tensor_builder->at(beta_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get(); + auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get(); auto epsilon = node.param().epsilon; auto activation = node.param().activation; - auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), - beta_tensor->handle(), epsilon); + auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(), + epsilon); _return_fn = std::make_unique<exec::FunctionSequence>( asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); @@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) float bias = 0.0f; // Don't offset the reduction. - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, radius, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::L2Pool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) @@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod auto beta = node.param().beta; auto bias = node.param().bias; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo( ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalAnd &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NELogicalAnd>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalNot &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEBitwiseNot>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NELogicalOr>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Logistic &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; - - // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'. - // The neon function returns a value outside of the limit of representation in float as 'NaN' - // instead of 'INF', and then the result of this op will be errors due to the 'NaN'. - auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LSTM &node) { _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor, - ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder); -} - -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>(); - - // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale - arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NENegLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg); } void KernelGenerator::visit(const ir::operation::Pack &node) @@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : node.getInputs()) input_indexes.emplace_back(input_index); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); std::vector<arm_compute::ITensor *> inputs; for (const auto &input_index : input_indexes) - inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout(); if (axis < 0) axis += output_rank; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEStackLayer>(); - // Disable applied dim_correction for (const auto &input_index : input_indexes) { size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) { @@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) } } - fn->configure(inputs, axis, output); + auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output); // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would // use arm_compute::TensorInfo::offset_element_in_bytes() @@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto rank = _ctx.at(input_index).shape().rank(); auto pad_base = _ctx.at(pad_index).data()->base(); - auto input = _tensor_builder->at(input_index).get()->handle(); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); ::arm_compute::PaddingList padding_list; padding_list.resize(rank); @@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); const auto axis = acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; @@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const auto pixel_value = ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); - auto fn = std::make_unique<::arm_compute::NEPadLayer>(); - fn->configure(input, output, padding_list, pixel_value); + auto fn = + acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value); _return_fn = asAclFunction(std::move(fn)); } +void KernelGenerator::visit(const ir::operation::Pool2D &node) +{ + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( + node, _ctx, _tensor_reg, _current_op_seq_layout, + acl_common::convertPoolType(node.param().op_type)); + + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + const auto activation = node.param().activation; + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); +} + void KernelGenerator::visit(const ir::operation::Permute &node) { const auto ofm_idx{node.getOutputs().at(0)}; const auto ifm_idx{node.getInputs().at(0)}; const auto permute_type = node.getPermuteType(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto rank = _ctx.at(ofm_idx).shape().rank(); assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); @@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node) // WHCN -> CWHN pv = arm_compute::PermutationVector{2, 0, 1}; - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) { // CWHN -> WHCN pv = arm_compute::PermutationVector{1, 2, 0}; - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else { - auto l = std::make_unique<::arm_compute::NECopy>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle()); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::PReLU &node) @@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node) const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto alpha_tensor = _tensor_builder->at(alpha_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get(); - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = std::make_unique<::arm_compute::NEPReluLayer>(); - - l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>( + ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reduce &node) @@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); @@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) std::unique_ptr<::arm_compute::IFunction> fn; if (reduce_type == ir::operation::Reduce::ReduceType::MEAN) { - auto l = std::make_unique<::arm_compute::NEReduceMean>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes, + keep_dims, output_tensor->handle()); } else if (reduce_type == ir::operation::Reduce::ReduceType::SUM) { - auto l = std::make_unique<::arm_compute::NEReduceSum>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes, + keep_dims, output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEReduceOperation>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(), - acl_common::convertReduceType(reduce_type)); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceOperation>( + input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(), + acl_common::convertReduceType(reduce_type)); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::NEActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU1 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU6 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reshape &node) @@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. @@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) UNUSED_RELEASE(frontend_layout); UNUSED_RELEASE(backend_layout); - auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) @@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NEScale>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), - ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, - ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); + auto fn = acl_common::generateLayer<arm_compute::NEScale>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR, + ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f), + ::arm_compute::SamplingPolicy::TOP_LEFT); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::RNN &node) @@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node) const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto weights_tensor = _tensor_builder->at(weights_index).get(); - auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); - auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get(); + auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); + auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get(); auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - auto copy_layer = std::make_unique<::arm_compute::NECopy>(); - copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); + auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>( + hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); _return_fn = asAclFunction(std::move(copy_layer)); - auto fn = std::make_unique<::arm_compute::NERNNLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_tensor->handle(), weights_tensor->handle(), - recurrent_weights_tensor->handle(), bias_tensor->handle(), - hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); - _return_fn = asAclFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NERsqrtLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - + auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(), + hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) (void)dims; (void)ndim; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - fn->configure(input_tensor->handle(), output_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Tanh &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::NEActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Softmax &node) @@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = input_tensor->layout(); @@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false)); } - auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(input_tensor->handle(), output_tensor->handle(), beta); + auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), beta); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) @@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); - auto paddings_tensor = _tensor_builder->at(paddings_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); + auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get(); assert(_ctx.at(block_size_index).data()); assert(_ctx.at(paddings_index).data()); - auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>(); - - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), - ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), + ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) @@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) auto block_size = node.param().block_size; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size); + auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), block_size); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Split &node) @@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &output : node.getOutputs()) output_indexes.emplace_back(output); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); std::vector<arm_compute::ITensor *> output_tensors; for (const auto &ofm_ind : output_indexes) - output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle()); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node) axis += ifm_rank; axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NESplit>(); - - fn->configure(ifm_tensor->handle(), output_tensors, axis); + auto fn = + acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis); _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::SQRT &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::SquaredDifference &node) { const auto ofm_index{node.getOutputs().at(0)}; const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Sub &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Slice &node) @@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) ends_set.set(i, ends[i]); } - auto fn = std::make_unique<::arm_compute::NESlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); + auto fn = acl_common::generateLayer<arm_compute::NESlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::StridedSlice &node) @@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) strides_set.set(i, strides[i]); } - auto fn = std::make_unique<::arm_compute::NEStridedSlice>(); + auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set, + begin_mask, end_mask, shrink_axis_mask); - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, - strides_set, begin_mask, end_mask, shrink_axis_mask); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TransposeConv &node) @@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); } - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); - auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>( + ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, + invalid_horizontal, invalid_vertical); - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), - tconv_info, invalid_horizontal, invalid_vertical); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Transpose &node) @@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; const auto &perm{node.param().perm}; - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - const auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) rank, pv, frontend_layout, backend_layout); std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2) { - auto l = std::make_unique<::arm_compute::NETranspose>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(), + ofm_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), backend_pv); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Unpack &node) @@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : node.getOutputs()) output_indexes.emplace_back(output_index); - auto input = _tensor_builder->at(input_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); std::vector<arm_compute::ITensor *> outputs; for (const auto &output_index : output_indexes) - outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); if (axis < 0) axis += input_rank; axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEUnstack>(); - // Disable applied dim_correction std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes; for (const auto &output_index : output_indexes) { size_t output_rank = _ctx.at(output_index).shape().rank(); - const auto &output_tensor = _tensor_builder->at(output_index); + const auto &output_tensor = _tensor_reg->getAclTensor(output_index); orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape()); assert(output_rank == output_tensor->num_dimensions()); if (output_rank != output_tensor->info()->num_dimensions()) @@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) } } - fn->configure(input, outputs, axis); + auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis); _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Add &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Div &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Exp &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEExpLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::NEReshapeLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Comparison &node) @@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto comparison_type = node.param().comparison_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - (arm_compute::ComparisonOperation)comparison_type); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseMin>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get(); + auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>( + input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), + (arm_compute::ComparisonOperation)comparison_type); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseMax>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::OneHot &node) @@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)}; const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->at(out_idx).get(); - auto indices_tensor = _tensor_builder->at(indices_idx).get(); - auto depth_tensor = _tensor_builder->at(depth_idx).get(); - auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get(); - auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get(); - - auto fn = std::make_unique<::arm_compute::CPPOneHotEx>(); - fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(), - offvalue_tensor->handle(), output_tensor->handle(), axis); - auto acl_fn = asAclFunction(std::move(fn)); - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(out_idx).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get(); + auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get(); + auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get(); + auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get(); + + auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>( + indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(), + offvalue_tensor->handle(), output_tensor->handle(), axis); + _return_fn = asAclFunction(std::move(fn)); } } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h index d6f7932b7..4d269cde5 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.h +++ b/runtime/onert/backend/acl_neon/KernelGenerator.h @@ -21,6 +21,8 @@ #include "ir/Operands.h" #include "TensorBuilder.h" +#include "AclTensorRegistry.h" +#include "TensorManager.h" namespace onert { @@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; - void visit(const ir::operation::Abs &) override; void visit(const ir::operation::ArgMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Dequantize &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::Floor &) override; void visit(const ir::operation::FullyConnected &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::InstanceNorm &) override; void visit(const ir::operation::L2Normalization &) override; - void visit(const ir::operation::L2Pool2D &) override; void visit(const ir::operation::LocalResponseNormalization &) override; - void visit(const ir::operation::LogicalAnd &) override; - void visit(const ir::operation::LogicalNot &) override; - void visit(const ir::operation::LogicalOr &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::Mul &) override; - void visit(const ir::operation::Neg &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::ReLU &) override; - void visit(const ir::operation::ReLU1 &) override; - void visit(const ir::operation::ReLU6 &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::RSQRT &) override; void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::Split &) override; - void visit(const ir::operation::SQRT &) override; void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Sub &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Div &) override; - void visit(const ir::operation::Exp &) override; void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Max &) override; void visit(const ir::operation::OneHot &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; ir::Layout _current_op_seq_layout; }; diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc index 2948cab09..ac80901cc 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.cc +++ b/runtime/onert/backend/acl_neon/Optimizer.cc @@ -19,7 +19,7 @@ #include "ParentInfo.h" #include <cassert> -#include <ir/LoweredGraph.h> +#include <compiler/LoweredGraph.h> #include <util/logging.h> #include "AclSubTensorAnalyzer.h" diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h index 3ec9efa8f..3b7cfbcfd 100644 --- a/runtime/onert/backend/acl_neon/TensorManager.h +++ b/runtime/onert/backend/acl_neon/TensorManager.h @@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager< using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor, operand::NESubTensor>; -TensorManager *createTensorManager(bool is_linear_executor) +inline TensorManager *createTensorManager(bool is_linear_executor) { if (is_linear_executor) { diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index 56bd352e0..fc8574b26 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -47,10 +47,12 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb, + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, context->external_context()); context->tensor_register = nullptr; context->optimizer = nullptr; diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h index f314a8e39..e90b21054 100644 --- a/runtime/onert/backend/cpu/BackendContext.h +++ b/runtime/onert/backend/cpu/BackendContext.h @@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext { public: BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, std::shared_ptr<ITensorRegister> tensor_register = nullptr, std::shared_ptr<IOptimizer> optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer, - kernel_gen, tensor_register, optimizer), + : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, + constant_initializer, kernel_gen, tensor_register, + optimizer), _external_context(new ExternalContext) { } diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc index deb27f0fe..6f6eb77bc 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.cc +++ b/runtime/onert/backend/cpu/ConstantInitializer.cc @@ -25,8 +25,8 @@ namespace cpu { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : IConstantInitializer{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h index de03a693a..c016c83bc 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.h +++ b/runtime/onert/backend/cpu/ConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ -#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" #include <backend/IConstantInitializer.h> #include <ir/Operands.h> @@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; @@ -50,10 +50,10 @@ public: void visit(const ir::operation::FullyConnected &) override; private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } private: - std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<ITensorRegistry> _tensor_reg; }; } // namespace cpu diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 7939fe894..74b6f0c6b 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -16,49 +16,36 @@ #include "KernelGenerator.h" -#include "ops/AbsLayer.h" -#include "ops/AddLayer.h" #include "ops/ArgMinMaxLayer.h" -#include "ops/AvgPoolLayer.h" #include "ops/BatchToSpaceNDLayer.h" -#include "ops/CastLayer.h" +#include "ops/BinaryArithmeticLayer.h" #include "ops/CompareLayer.h" #include "ops/ConcatLayer.h" #include "ops/ConvolutionLayer.h" -#include "ops/CosLayer.h" #include "ops/DepthwiseConvolutionLayer.h" -#include "ops/DivLayer.h" #include "ops/EinsumLayer.h" -#include "ops/ExpLayer.h" +#include "ops/ElementwiseActivationLayer.h" +#include "ops/ElementwiseBinaryLayer.h" +#include "ops/ElementwiseUnaryLayer.h" #include "ops/ExpandDimsLayer.h" #include "ops/FillLayer.h" #include "ops/FullyConnectedLayer.h" #include "ops/GatherLayer.h" -#include "ops/LogLayer.h" -#include "ops/LogisticLayer.h" -#include "ops/MaxLayer.h" -#include "ops/MaxPoolLayer.h" #include "ops/MeanLayer.h" -#include "ops/MinLayer.h" -#include "ops/MulLayer.h" -#include "ops/NegLayer.h" #include "ops/OneHotLayer.h" #include "ops/OperationUtils.h" #include "ops/PackLayer.h" #include "ops/PadLayer.h" +#include "ops/PoolLayer.h" #include "ops/PowLayer.h" #include "ops/RangeLayer.h" +#include "ops/RankLayer.h" #include "ops/ReduceLayer.h" -#include "ops/ReLULayer.h" -#include "ops/ReLU6Layer.h" #include "ops/ReshapeLayer.h" #include "ops/ResizeBilinearLayer.h" #include "ops/ReverseLayer.h" -#include "ops/RoundLayer.h" -#include "ops/RsqrtLayer.h" #include "ops/SelectLayer.h" #include "ops/ShapeLayer.h" -#include "ops/SinLayer.h" #include "ops/SliceLayer.h" #include "ops/SoftMaxLayer.h" #include "ops/StridedSliceLayer.h" @@ -66,22 +53,16 @@ #include "ops/SpaceToDepthLayer.h" #include "ops/SplitLayer.h" #include "ops/SplitVLayer.h" -#include "ops/SubLayer.h" -#include "ops/TanhLayer.h" #include "ops/TileLayer.h" #include "ops/TransposeLayer.h" #include "ops/UnpackLayer.h" -#include "ops/LogicalNotLayer.h" -#include "ops/ZerosLikeLayer.h" #include "ops/SquaredDiffLayer.h" -#include "ops/LogicalOrLayer.h" #include "ops/L2NormLayer.h" #include "ops/MatrixBandPartLayer.h" #include "ops/BatchMatMulLayer.h" #include "ops/BroadcastToLayer.h" #include "ops/FusedBatchNormLayer.h" #include "ops/LogSoftMaxLayer.h" -#include "ops/QuantizeLayer.h" #include "ops/StatelessRandomUniformLayer.h" #include <backend/Backend.h> @@ -102,6 +83,104 @@ namespace cpu namespace { +ops::ArithmeticType +convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir) +{ + switch (arithmetic_type_ir) + { + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + return ops::ArithmeticType::kAdd; + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + return ops::ArithmeticType::kSub; + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + return ops::ArithmeticType::kMul; + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + return ops::ArithmeticType::kDiv; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseActivationType +convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseActivation::Type::LOGISTIC: + return ops::ElementwiseActivationType::kLogistic; + case ir::operation::ElementwiseActivation::Type::RELU: + return ops::ElementwiseActivationType::kReLU; + case ir::operation::ElementwiseActivation::Type::TANH: + return ops::ElementwiseActivationType::kTanh; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseBinaryType +convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + return ops::ElementwiseBinaryType::kLogicalOr; + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + return ops::ElementwiseBinaryType::kMax; + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + return ops::ElementwiseBinaryType::kMin; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseUnary::Type::ABS: + return ops::ElementwiseUnaryType::kAbs; + case ir::operation::ElementwiseUnary::Type::CAST: + return ops::ElementwiseUnaryType::kCast; + case ir::operation::ElementwiseUnary::Type::COS: + return ops::ElementwiseUnaryType::kCos; + case ir::operation::ElementwiseUnary::Type::ERF: + return ops::ElementwiseUnaryType::kErf; + case ir::operation::ElementwiseUnary::Type::EXP: + return ops::ElementwiseUnaryType::kExp; + case ir::operation::ElementwiseUnary::Type::LOG: + return ops::ElementwiseUnaryType::kLog; + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + return ops::ElementwiseUnaryType::kLogicalNot; + case ir::operation::ElementwiseUnary::Type::NEG: + return ops::ElementwiseUnaryType::kNeg; + case ir::operation::ElementwiseUnary::Type::QUANTIZE: + return ops::ElementwiseUnaryType::kQuantize; + case ir::operation::ElementwiseUnary::Type::ROUND: + return ops::ElementwiseUnaryType::kRound; + case ir::operation::ElementwiseUnary::Type::RSQRT: + return ops::ElementwiseUnaryType::kRSqrt; + case ir::operation::ElementwiseUnary::Type::SIN: + return ops::ElementwiseUnaryType::kSin; + case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE: + return ops::ElementwiseUnaryType::kZerosLike; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir) +{ + switch (type_ir) + { + case ir::operation::Pool2D::PoolType::AVG: + return ops::PoolType::kAvg; + case ir::operation::Pool2D::PoolType::MAX: + return ops::PoolType::kMax; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir) { switch (reduce_type_ir) @@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ KernelGenerator::KernelGenerator( const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context) : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), - _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN), - _external_context(external_context) + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context) { // DO NOTHING } @@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) { assert(!_return_fn_seq); assert(_tensor_builder->dynamicTensorManager()); - assert(_tensor_builder->tensorRegistry()); + assert(_tensor_reg); - auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager(); - auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>( - _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry()); + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); _return_fn_seq = std::make_unique<exec::FunctionSequence>(); @@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) dyn_ctx->op_seq = &op_seq; dyn_ctx->operations = &_operations_ctx; dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry(); + dyn_ctx->tensor_registry = _tensor_reg; dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); @@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) { - auto portable_tensor = _tensor_builder->portableAt(ind); + auto portable_tensor = _tensor_reg->getPortableTensor(ind); if (portable_tensor) { assert(portable_tensor->layout() == ir::Layout::NHWC); } - auto tensor = _tensor_builder->at(ind); + auto tensor = _tensor_reg->getNativeTensor(ind); if (tensor) { tensor->increase_ref(); @@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - auto ker_tensor = _tensor_builder->portableAt(ker_index).get(); - auto bias_tensor = _tensor_builder->portableAt(bias_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get(); const auto stride = node.param().stride; const auto activation = node.param().activation; const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; auto fn = std::make_unique<ops::ConvolutionLayer>(); if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic()) { fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left, param_padding.param.right, param_padding.param.top, param_padding.param.bottom, - stride.horizontal, stride.vertical, activation, ofm_tensor); + stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor, + activation, ofm_tensor); _return_fn = std::move(fn); return; @@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_width = ker_shape.dim(2); const auto padding = - ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height); + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, - activation, ofm_tensor); + dilation.width_factor, dilation.height_factor, activation, ofm_tensor); _return_fn = std::move(fn); } @@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - auto ker_tensor = _tensor_builder->portableAt(ker_index).get(); - auto bias_tensor = _tensor_builder->portableAt(bias_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get(); auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(); @@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; - - const auto kh = node.param().kh; - const auto kw = node.param().kw; - - const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::MaxPoolLayer>(); - - fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, - stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; - - const auto kh = node.param().kh; - const auto kw = node.param().kw; - const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::AvgPoolLayer>(); - - fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, - stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node) const auto rank = _ctx.at(ofm_index).shape().rank(); const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); auto fn = std::make_unique<ops::ConcatLayer>(); @@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)}; const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto input_alloc = _tensor_builder->portableAt(input_index).get(); - auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto input_alloc = _tensor_reg->getPortableTensor(input_index).get(); + auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get(); auto fn = std::make_unique<ops::BatchToSpaceNDLayer>(); @@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) if (node.getInputs().size() != NNApiInputs) { const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)}; - crops_alloc = _tensor_builder->portableAt(crops_data_index).get(); + crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get(); } fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc); @@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node) const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)}; const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto value_tensor = _tensor_builder->portableAt(value_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto value_tensor = _tensor_reg->getPortableTensor(value_index).get(); auto fn = std::make_unique<ops::FillLayer>(); @@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto weight_tensor = _tensor_builder->portableAt(weight_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get(); auto bias_tensor = - bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get(); + bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get(); auto fn = std::make_unique<ops::FullyConnectedLayer>(); @@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); // optional 2nd input IPortableTensor *shape_tensor = nullptr; @@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) if (node.getInputs().size() == 2) { const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; - shape_tensor = _tensor_builder->portableAt(shape_index).get(); + shape_tensor = _tensor_reg->getPortableTensor(shape_index).get(); } auto fn = std::make_unique<ops::ReshapeLayer>(); @@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); // Squeeze can share same kernel with reshape auto fn = std::make_unique<ops::ReshapeLayer>(); @@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::SoftMaxLayer>(); @@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Add &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::AddLayer>(); + auto fn = std::make_unique<ops::BinaryArithmeticLayer>(); - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); + fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation, + convertArithmeticType(node.param().arithmetic_type)); _return_fn = std::move(fn); } @@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); auto comparison_type = node.param().comparison_type; @@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto indices_tensor = _tensor_builder->portableAt(indices_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get(); const auto backend_layout = output_tensor->layout(); UNUSED_RELEASE(backend_layout); @@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Sub &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::SubLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MulLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::OneHot &node) { const auto output_index{node.getOutputs().at(0)}; @@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto indices_tensor = _tensor_builder->portableAt(indices_index).get(); - auto depth_tensor = _tensor_builder->portableAt(depth_index).get(); - auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get(); - auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get(); + auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get(); + auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get(); + auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get(); assert(indices_tensor->data_type() == OperandType::INT32); assert(axis <= static_cast<int>(indices_tensor->num_dimensions())); @@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Div &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::DivLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Einsum &node) { const auto ofm_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); const auto equation = node.param().equation; @@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node) const auto &operand = _ctx.at(idx); // TODO make sure using `_current_op_seq_layout` is correct for custom operations types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()}); - auto in_tensor = _tensor_builder->portableAt(idx); + auto in_tensor = _tensor_reg->getPortableTensor(idx); tensors.emplace_back(in_tensor); } }; @@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Exp &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); - auto fn = std::make_unique<ops::ExpLayer>(); + auto fn = std::make_unique<ops::ElementwiseActivationLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta, + convertElementwiseActivationType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ExpandDims &node) +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axis_tensor = _tensor_builder->portableAt(axis_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::ExpandDimsLayer>(); + auto fn = std::make_unique<ops::ElementwiseBinaryLayer>(); - fn->configure(input_tensor, axis_tensor, output_tensor); + fn->configure(lhs_tensor, rhs_tensor, output_tensor, + convertElementwiseBinaryType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Logistic &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); - auto fn = std::make_unique<ops::LogisticLayer>(); + auto fn = std::make_unique<ops::ElementwiseUnaryLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Tanh &node) +void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get(); - auto fn = std::make_unique<ops::TanhLayer>(); + auto fn = std::make_unique<ops::ExpandDimsLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, axis_tensor, output_tensor); _return_fn = std::move(fn); } @@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node) assert(-rank <= axis && axis < rank); - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); auto fn = std::make_unique<ops::PackLayer>(); @@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) assert(rank == 0 || (-rank <= axis && axis < rank)); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); std::vector<IPortableTensor *> output_tensors; for (auto &output_idx : node.getOutputs()) - output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::UnpackLayer>(); @@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const auto output_index{node.getOutputs().at(0)}; assert(_ctx.at(pad_index).data()); - auto input = _tensor_builder->portableAt(input_index).get(); - auto output = _tensor_builder->portableAt(output_index).get(); + auto input = _tensor_reg->getPortableTensor(input_index).get(); + auto output = _tensor_reg->getPortableTensor(output_index).get(); auto pad_rank = _ctx.at(pad_index).shape().dim(0); auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base()); @@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MaxLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MinLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Cast &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::CastLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Transpose &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::TransposeLayer>(); @@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)}; const auto keep_dims = node.param().keep_dims; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axes_tensor = _tensor_builder->portableAt(axes_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get(); if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN) { @@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) } } -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::ReLULayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU6 &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::ReLU6Layer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Select &node) { const auto output_index{node.getOutputs().at(0)}; @@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node) const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto condition_tensor = _tensor_builder->portableAt(condition_index).get(); - auto true_tensor = _tensor_builder->portableAt(true_index).get(); - auto false_tensor = _tensor_builder->portableAt(false_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get(); + auto true_tensor = _tensor_reg->getPortableTensor(true_index).get(); + auto false_tensor = _tensor_reg->getPortableTensor(false_index).get(); auto fn = std::make_unique<ops::SelectLayer>(); @@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto begins_tensor = _tensor_builder->portableAt(begins_index).get(); - auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get(); + auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get(); auto fn = std::make_unique<ops::SliceLayer>(); @@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto starts_tensor = _tensor_builder->portableAt(starts_index).get(); - auto ends_tensor = _tensor_builder->portableAt(ends_index).get(); - auto strides_tensor = _tensor_builder->portableAt(strides_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get(); + auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get(); + auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get(); auto begin_mask = node.param().begin_mask; auto end_mask = node.param().end_mask; @@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node) const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); auto axis_resolved = axis < 0 ? axis + rank : axis; - auto in_tensor = _tensor_builder->portableAt(input_idx).get(); + auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get(); std::vector<IPortableTensor *> out_tensors; for (auto &output_idx : node.getOutputs()) - out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::SplitLayer>(); @@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::AbsLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Sin &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::SinLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Cos &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::CosLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::RsqrtLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Shape &node) { const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); auto fn = std::make_unique<ops::ShapeLayer>(); @@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) auto align_corners = node.param().align_corners; auto half_pixel_centers = node.param().half_pixel_centers; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::ResizeBilinearLayer>(); @@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)}; const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axis_tensor = _tensor_builder->portableAt(axis_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get(); auto fn = std::make_unique<ops::ReverseLayer>(); @@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::NegLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::ArgMax &node) { const auto output_index{node.getOutputs().at(0)}; @@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::ArgMinMaxLayer>(); @@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Pow &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::PowLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Log &node) +void KernelGenerator::visit(const ir::operation::Pool2D &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::LogLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} + const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; -void KernelGenerator::visit(const ir::operation::Round &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)}; + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto padding = + ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); - auto fn = std::make_unique<ops::RoundLayer>(); + auto fn = std::make_unique<ops::PoolLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, + stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor, + convertPoolType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::LogicalNot &node) +void KernelGenerator::visit(const ir::operation::Pow &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::LogicalNotLayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(0)}; - const auto rhs_index{node.getInputs().at(1)}; + const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::LogicalOrLayer>(); + auto fn = std::make_unique<ops::PowLayer>(); - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); + fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor); _return_fn = std::move(fn); } @@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(0)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto input_alloc = _tensor_builder->portableAt(input_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto input_alloc = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::L2NormLayer>(); @@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ZerosLike &node) +void KernelGenerator::visit(const ir::operation::Range &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)}; + const auto start_index{node.getInputs().at(ir::operation::Range::START)}; + const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)}; + const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto start_tensor = _tensor_reg->getPortableTensor(start_index).get(); + auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get(); + auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get(); - auto fn = std::make_unique<ops::ZerosLikeLayer>(); + auto fn = std::make_unique<ops::RangeLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Range &node) +void KernelGenerator::visit(const ir::operation::Rank &node) { - const auto output_index{node.getOutputs().at(0)}; - const auto start_index{node.getInputs().at(ir::operation::Range::START)}; - const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)}; - const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto start_tensor = _tensor_builder->portableAt(start_index).get(); - auto limit_tensor = _tensor_builder->portableAt(limit_index).get(); - auto delta_tensor = _tensor_builder->portableAt(delta_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); - auto fn = std::make_unique<ops::RangeLayer>(); + auto fn = std::make_unique<ops::RankLayer>(); + + fn->configure(ifm_tensor, ofm_tensor); - fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor); _return_fn = std::move(fn); } @@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node) const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); auto fn = std::make_unique<ops::SqDiffLayer>(); @@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node) const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)}; const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get(); auto fn = std::make_unique<ops::TileLayer>(); @@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node) const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)}; const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get(); - auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get(); + auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get(); auto fn = std::make_unique<ops::MatrixBandPartLayer>(); @@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node) const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); const auto adj_x = node.param().adj_x; const auto adj_y = node.param().adj_y; @@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node) const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)}; const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto shape_tensor = _tensor_builder->portableAt(shape_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get(); auto fn = std::make_unique<ops::BroadcastToLayer>(); @@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node) { const auto ofm_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); const auto epsilon = node.param().epsilon; const auto is_training = node.param().is_training; @@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node) const auto beta = node.param().beta; const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::LogSoftMaxLayer>(); @@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)}; const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get(); - auto padding_tensor = _tensor_builder->portableAt(padding_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get(); + auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get(); auto fn = std::make_unique<ops::SpaceToBatchNDLayer>(); @@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Quantize &node) -{ - const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)}; - const auto output_index{node.getOutputs().at(0)}; - - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - - auto fn = std::make_unique<ops::QuantizeLayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) { const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; const auto output_index{node.getOutputs().at(0)}; auto block_size = node.param().block_size; - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto output_tensor = _tensor_builder->portableAt(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); auto fn = std::make_unique<ops::SpaceToDepthLayer>(); @@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node) const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)}; const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto shape_alloc = _tensor_builder->portableAt(shape_index).get(); - auto seed_alloc = _tensor_builder->portableAt(seed_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get(); + auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get(); auto fn = std::make_unique<ops::StatelessRandomUniformLayer>(); @@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node) const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)}; const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)}; - auto in_tensor = _tensor_builder->portableAt(input_idx).get(); - auto in_size_splits = _tensor_builder->portableAt(size_splits).get(); - auto in_split_dim = _tensor_builder->portableAt(split_dim).get(); + auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get(); + auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get(); + auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get(); std::vector<IPortableTensor *> out_tensors; for (auto &output_idx : node.getOutputs()) - out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::SplitVLayer>(); diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index 40c056a96..786e68ee0 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -19,6 +19,7 @@ #include "ExternalContext.h" #include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" #include "Tensor.h" #include <backend/CustomKernelBuilder.h> @@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context); @@ -46,8 +48,6 @@ public: void visit(const ir::OpSequence &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Fill &) override; void visit(const ir::operation::FullyConnected &) override; @@ -55,51 +55,35 @@ public: void visit(const ir::operation::Squeeze &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Sub &) override; - void visit(const ir::operation::Mul &) override; - void visit(const ir::operation::Div &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Einsum &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::Custom &node) override; - void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Max &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Unpack &) override; void visit(const ir::operation::OneHot &) override; - void visit(const ir::operation::Cast &) override; void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::ReLU &) override; - void visit(const ir::operation::ReLU6 &) override; void visit(const ir::operation::Select &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::Split &) override; - void visit(const ir::operation::Abs &) override; - void visit(const ir::operation::Cos &) override; - void visit(const ir::operation::Sin &) override; - void visit(const ir::operation::RSQRT &) override; void visit(const ir::operation::Shape &) override; void visit(const ir::operation::ResizeBilinear &node) override; void visit(const ir::operation::Reverse &) override; - void visit(const ir::operation::Neg &) override; void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::Log &) override; - void visit(const ir::operation::Round &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Pow &) override; - void visit(const ir::operation::LogicalNot &) override; - void visit(const ir::operation::ZerosLike &) override; void visit(const ir::operation::SquaredDifference &) override; void visit(const ir::operation::Tile &) override; - void visit(const ir::operation::LogicalOr &) override; void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::Range &) override; + void visit(const ir::operation::Rank &) override; void visit(const ir::operation::MatrixBandPart &) override; void visit(const ir::operation::BatchMatMul &) override; void visit(const ir::operation::BatchToSpaceND &) override; @@ -107,7 +91,6 @@ public: void visit(const ir::operation::FusedBatchNorm &) override; void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::Quantize &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::StatelessRandomUniform &) override; void visit(const ir::operation::SplitV &) override; @@ -116,6 +99,7 @@ private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; ir::Layout _current_op_seq_layout; const std::shared_ptr<ExternalContext> _external_context; diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc index ab8ba5756..828d52f7c 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.cc +++ b/runtime/onert/backend/cpu/TensorBuilder.cc @@ -27,8 +27,8 @@ namespace backend namespace cpu { -TensorBuilder::TensorBuilder() - : _tensor_reg{new cpu_common::TensorRegistry()}, +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} { @@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); const auto tensor_info = _tensor_info_map.at(ind); - if (!at(ind)->is_dynamic()) + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) { const auto size = tensor_info.total_size(); _static_tensor_mgr->claimPlan(ind, size); @@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { - if (!at(ind)->is_dynamic()) + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) { _static_tensor_mgr->releasePlan(ind); } @@ -85,29 +85,6 @@ void TensorBuilder::allocate() // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. } -std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind) -{ - return _tensor_reg->getITensor(ind); -} - -std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind) -{ - return _tensor_reg->getPortableTensor(ind); -} - -bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind, - const std::shared_ptr<IPortableTensor> &tensor) -{ - return _tensor_reg->setMigrantTensor(ind, tensor); -} - -void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); } - -std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind) -{ - return _tensor_reg->getNativeTensor(ind); -} - std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void) { return std::move(_static_tensor_mgr); diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h index 617136514..b6d5f09cc 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.h +++ b/runtime/onert/backend/cpu/TensorBuilder.h @@ -38,9 +38,7 @@ namespace cpu class TensorBuilder : public ITensorBuilder { public: - TensorBuilder(); - - bool supportDynamicTensor() override { return true; } + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); /** * @brief Register tensor information to allocate on CPU backend @@ -60,34 +58,12 @@ public: void allocate() override; void postFunctionPrepare() override { /* DO NOTHING */} - /** - * @brief Get tensor with a specific OperandIndex - * - * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise. - */ - std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override; - - void iterate(const IterateFunction &fn) override; - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override; - /** - * @brief Get tensor with a specific OperandIndex. - * @param ind OperandIndex for the tensor. There must exist a tensor with this ind. - * If not, program will crash with assert or exception. - * @return shared_ptr<Tensor> - */ - std::shared_ptr<Tensor> at(const ir::OperandIndex &ind); - std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind); - bool setMigrantTensor(const ir::OperandIndex &ind, - const std::shared_ptr<IPortableTensor> &tensor) override; - - std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; } - private: const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc deleted file mode 100644 index 322785aeb..000000000 --- a/runtime/onert/backend/cpu/ops/AbsLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AbsLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void AbsLayer::absFloat32() -{ - nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; } - -void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void AbsLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - absFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - absQuant8(); - } - else - { - throw std::runtime_error{"Abs: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h deleted file mode 100644 index feb5f35ae..000000000 --- a/runtime/onert/backend/cpu/ops/AbsLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ - -#include "backend/IPortableTensor.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AbsLayer : public ::onert::exec::IFunction -{ -public: - AbsLayer(); - -public: - void absFloat32(); - - void absQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc deleted file mode 100644 index 379215303..000000000 --- a/runtime/onert/backend/cpu/ops/AddLayer.cc +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AddLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void AddLayer::addFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void AddLayer::addInt32() -{ - int32_t output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); -} - -void AddLayer::addQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - // Parameters for scaled quantized computation - op_params.left_shift = 20; - // Zero-points of input and output tensors - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); - assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); - assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); - - // Compute normalized scale for _lhs and _rhs values, - // and represent in 32-bit fixed point - const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale()); - const double real_lhs_scale = _lhs->data_scale() / norm_max_scale; - const double real_rhs_scale = _rhs->data_scale() / norm_max_scale; - // output scale is used to normalize final result, so we invert the scale here - const double real_output_scale = - norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift)); - - // Represent the scales as fixed int32_t multipliers, and int32_t shifts - QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); - QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); - QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); - - // cker quant8 add is not implemented yet - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void AddLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - addFloat32(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - addQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - addInt32(); - } - else - { - throw std::runtime_error{"Add: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h deleted file mode 100644 index 91030d93a..000000000 --- a/runtime/onert/backend/cpu/ops/AddLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AddLayer : public ::onert::exec::IFunction -{ -public: - AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void addFloat32(); - - void addQuant8(); - - void addInt32(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc deleted file mode 100644 index 9c22c1c86..000000000 --- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AvgPoolLayer.h" - -#include <cker/operation/AveragePool.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -#define AVGPOOLING_PARAMETERS \ - nnfw::cker::PoolParams op_params; \ - op_params.stride_height = _strideHeight; \ - op_params.stride_width = _strideWidth; \ - op_params.filter_height = _kernelHeight; \ - op_params.filter_width = _kernelWidth; \ - op_params.padding_values.height = (int8_t)_paddingTop; \ - op_params.padding_values.width = (int8_t)_paddingLeft; - -AvgPoolLayer::AvgPoolLayer() - : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0), - _activation(ir::Activation::NONE) -{ - // DO NOTHING -} - -void AvgPoolLayer::averagePoolFloat32() -{ - AVGPOOLING_PARAMETERS - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - nnfw::cker::AveragePool(op_params, getTensorShape(_input), - reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} -void AvgPoolLayer::averagePoolQuant8() -{ - AVGPOOLING_PARAMETERS - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - nnfw::cker::AveragePool(op_params, getTensorShape(_input), - reinterpret_cast<const uint8_t *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output) -{ - assert(input != nullptr); - assert(output != nullptr); - - _input = input; - _paddingLeft = paddingLeft; - _paddingRight = paddingRight; - _paddingTop = paddingTop; - _paddingBottom = paddingBottom; - _strideWidth = strideWidth; - _strideHeight = strideHeight; - _kernelWidth = kernelWidth; - _kernelHeight = kernelHeight; - _activation = activation; - _output = output; -} - -void AvgPoolLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - averagePoolFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - averagePoolQuant8(); - } - else - { - throw std::runtime_error{"AvgPool: unsupported data type"}; - } -} - -#undef AVGPOOLING_PARAMETERS - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h deleted file mode 100644 index d4e8f79e7..000000000 --- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AvgPoolLayer : public ::onert::exec::IFunction -{ -public: - AvgPoolLayer(); - -public: - void averagePoolFloat32(); - - void averagePoolQuant8(); - - void configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; - - uint32_t _paddingLeft; - uint32_t _paddingTop; - uint32_t _paddingRight; - uint32_t _paddingBottom; - - uint32_t _strideWidth; - uint32_t _strideHeight; - uint32_t _kernelWidth; - uint32_t _kernelHeight; - - ir::Activation _activation; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc new file mode 100644 index 000000000..f50c63375 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BinaryArithmeticLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ + +template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> +void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + nnfw::cker::BinaryArithmeticOpParam op_params) +{ + const bool need_broadcast = + nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params); + if (need_broadcast) + { + nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>( + op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + return; + } + + nnfw::cker::BinaryArithmeticOp<arithmetic_type>( + op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <nnfw::cker::BinaryArithmeticOpType arithmetic_type> +std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> +generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam op_params) +{ + switch (lhs->data_type()) + { + case OperandType::FLOAT32: + { + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(activation, &output_activation_min, &output_activation_max); + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, op_params); + break; + } + case OperandType::INT32: + { + int32_t output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(activation, &output_activation_min, &output_activation_max); + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, op_params); + break; + } + default: + throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"}; + } +} + +void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam &op_params = *params; + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + // Parameters for scaled quantized computation + op_params.left_shift = 20; + // Zero-points of input and output tensors + op_params.input1_offset = -lhs->data_offset(); + op_params.input2_offset = -rhs->data_offset(); + op_params.output_offset = output->data_offset(); + assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); + assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); + assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); + + // Compute normalized scale for _lhs and _rhs values, + // and represent in 32-bit fixed point + const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale()); + const double real_lhs_scale = lhs->data_scale() / norm_max_scale; + const double real_rhs_scale = rhs->data_scale() / norm_max_scale; + // output scale is used to normalize final result, so we invert the scale here + const double real_output_scale = + norm_max_scale / (output->data_scale() * (1 << op_params.left_shift)); + + // Represent the scales as fixed int32_t multipliers, and int32_t shifts + QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); + QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); + QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); +} + +void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam &op_params = *params; + + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + op_params.input1_offset = -lhs->data_offset(); + op_params.input2_offset = -rhs->data_offset(); + op_params.output_offset = output->data_offset(); + + double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale(); + QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift); +} + +} // namespace + +void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, const ir::Activation activation, + const ArithmeticType arithmetic_type) +{ + assert(lhs != nullptr); + assert(rhs != nullptr); + assert(output != nullptr); + + _lhs = lhs; + _rhs = rhs; + _output = output; + + nnfw::cker::BinaryArithmeticOpParam op_params; + switch (arithmetic_type) + { + case ArithmeticType::kAdd: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params); + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kSub: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params); + op_params.input2_multiplier *= -1; + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kMul: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + nnfw::cker::BinaryArithmeticOpParam op_params; + setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params); + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kDiv: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + throw std::runtime_error{ + "BinaryArithmetic(Div): Div operation does not support quantization"}; + } + else if (_lhs->data_type() == OperandType::INT32) + { + throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"}; + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation, + op_params); + } + break; + default: + throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"}; + } +} + +void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h index 9411be76e..d6b33ad07 100644 --- a/runtime/onert/backend/cpu/ops/DivLayer.h +++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ #include <backend/IPortableTensor.h> #include "OperationUtils.h" @@ -31,21 +31,25 @@ namespace cpu namespace ops { -class DivLayer : public ::onert::exec::IFunction +enum class ArithmeticType +{ + kAdd, + kSub, + kMul, + kDiv, +}; + +class BinaryArithmeticLayer : public ::onert::exec::IFunction { public: - DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) + BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) { // DO NOTHING } public: - void divFloat32(); - - void divQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); + void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + const ir::Activation activation, const ArithmeticType arithmetic_type); void run() override; @@ -54,7 +58,7 @@ private: const IPortableTensor *_rhs; IPortableTensor *_output; - ir::Activation _activation{ir::Activation::NONE}; + std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -62,4 +66,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc deleted file mode 100644 index 497515606..000000000 --- a/runtime/onert/backend/cpu/ops/CastLayer.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "CastLayer.h" - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -CastLayer::CastLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out) -{ - auto input_shape = getTensorShape(_input); - auto output_shape = getTensorShape(_output); - const auto num_elements = MatchingFlatSize(input_shape, output_shape); - - std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); }); -} - -template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out) -{ - switch (_output->data_type()) - { - case ir::DataType::FLOAT32: - castTensor(in, out.f); - return; - case ir::DataType::INT32: - castTensor(in, out.i32); - return; - case ir::DataType::UINT32: - castTensor(in, out.u32); - return; - case ir::DataType::UINT8: - castTensor(in, out.u8); - return; - case ir::DataType::BOOL8: - castTensor(in, out.b); - return; - case ir::DataType::INT64: - castTensor(in, out.i64); - return; - default: - throw std::runtime_error("Not supported output type" + - std::to_string((int)_output->data_type())); - } -} - -void CastLayer::run() -{ - auto input_buf = _input->buffer(); - auto output_buf = _output->buffer(); - const auto in = *reinterpret_cast<const DataPtr *>(&input_buf); - auto out = *reinterpret_cast<DataPtr *>(&output_buf); - - switch (_input->data_type()) - { - case ir::DataType::FLOAT32: - castPtr(in.f, out); - return; - case ir::DataType::INT32: - castPtr(in.i32, out); - return; - case ir::DataType::UINT32: - castPtr(in.u32, out); - return; - case ir::DataType::UINT8: - castPtr(in.u8, out); - return; - case ir::DataType::BOOL8: - castPtr(in.b, out); - return; - case ir::DataType::INT64: - castPtr(in.i64, out); - return; - default: - throw std::runtime_error("Cast: unsupported data type" + - std::to_string((int)_input->data_type())); - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h deleted file mode 100644 index 290c722e2..000000000 --- a/runtime/onert/backend/cpu/ops/CastLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class CastLayer : public ::onert::exec::IFunction -{ -public: - CastLayer(); - -public: - template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out); - template <typename FromT> void castPtr(const FromT *in, DataPtr out); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 2d5bbef1e..c057267d3 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -31,7 +31,8 @@ namespace ops ConvolutionLayer::ConvolutionLayer() : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1), + _dilationHeightFactor(1), _activation(ir::Activation::NONE), _conv_kernel(new nnfw::cker::Conv()), _prepare(false) { // DO NOTHING @@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32() op_params.padding_values.height = _paddingTop; op_params.stride_width = _strideWidth; op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; @@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8() nnfw::cker::ConvParams op_params; op_params.stride_width = _strideWidth; op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; op_params.padding_type = getPaddingType(_paddingType); op_params.padding_values.width = _paddingLeft; op_params.padding_values.height = _paddingTop; @@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output) { _input = input; @@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe _paddingBottom = paddingBottom; _strideWidth = strideWidth; _strideHeight = strideHeight; + _dilationWidthFactor = dilationWidthFactor; + _dilationHeightFactor = dilationHeightFactor; _activation = activation; _output = output; } @@ -145,7 +150,8 @@ void ConvolutionLayer::run() param_padding.param.bottom = _paddingBottom; const auto padding = - ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height); + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + _dilationWidthFactor, _dilationHeightFactor); _paddingLeft = padding.left; _paddingRight = padding.right; @@ -176,7 +182,8 @@ void ConvolutionLayer::prepare() { bool is_transposed = false; kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), - getPaddingType(_paddingType), is_transposed); + getPaddingType(_paddingType), is_transposed, _dilationWidthFactor, + _dilationHeightFactor); // Decrease reference of _kernel(weights) only when _kernel is constant if (is_transposed) diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h index 2833387c4..398892e65 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h @@ -56,7 +56,8 @@ public: const IPortableTensor *bias, ir::PaddingType _paddingType, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const ir::Activation activation, + const uint32_t strideHeight, const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output); void run() override; @@ -77,6 +78,8 @@ private: uint32_t _strideWidth; uint32_t _strideHeight; + uint32_t _dilationWidthFactor; + uint32_t _dilationHeightFactor; ir::Activation _activation; diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc deleted file mode 100644 index 9417019d5..000000000 --- a/runtime/onert/backend/cpu/ops/CosLayer.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "CosLayer.h" -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -CosLayer::CosLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CosLayer::cosFloat32() -{ - nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; } - -void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void CosLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - cosFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - cosQuant8(); - } - else - { - throw std::runtime_error{"Cos: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h deleted file mode 100644 index 1fadef718..000000000 --- a/runtime/onert/backend/cpu/ops/CosLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class CosLayer : public ::onert::exec::IFunction -{ -public: - CosLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void cosFloat32(); - void cosQuant8(); - - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc deleted file mode 100644 index 556c55e33..000000000 --- a/runtime/onert/backend/cpu/ops/DivLayer.cc +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DivLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void DivLayer::divFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs); - if (requires_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - } - else - { - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - } -} - -void DivLayer::divQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - // op_params.quantized_activation_max = output_activation_max; - // op_params.quantized_activation_min = output_activation_min; - - // cker quant8 div is not implemented yet - throw std::runtime_error{"Div NYI for quantized"}; -} - -void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void DivLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - divFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - divQuant8(); - } - else - { - throw std::runtime_error{"Div: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc new file mode 100644 index 000000000..c1d63172b --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseActivationLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Logistic.h> +#include <cker/operation/ReLU.h> +#include <cker/operation/ReLU6.h> +#include <cker/operation/Tanh.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +ElementwiseActivationLayer::ElementwiseActivationLayer() + : _input(nullptr), _output(nullptr), _kernel() +{ + // DO NOTHING +} + +void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type) +{ + const auto input_scale = static_cast<double>(_input->data_scale()); + const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); + const auto output_scale = static_cast<double>(_output->data_scale()); + const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits<uint8_t>::max(); + int32_t minval = std::numeric_limits<uint8_t>::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + float transformed = 0.f; + if (op_type == ElementwiseActivationType::kTanh) + { + transformed = std::tanh(dequantized); + } + else if (op_type == ElementwiseActivationType::kLogistic) + { + transformed = 1.0f / (1.0f + std::exp(-dequantized)); + } + else + { + throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type"); + } + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); + _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); + } +} + +void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input, + IPortableTensor *output) +{ + const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output)); + const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer()); + uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer()); + + for (int i = 0; i < size; ++i) + { + output_data[i] = _table[input_data[i]]; + } +} + +void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output, + float alpha, float beta, + ElementwiseActivationType op_type) +{ + _input = input; + _output = output; + + switch (op_type) + { + case ElementwiseActivationType::kLogistic: + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(op_type); + _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this, + std::placeholders::_1, std::placeholders::_2); + } + else if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::Logistic(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; + } + break; + case ElementwiseActivationType::kReLU: + if (_input->data_type() == OperandType::FLOAT32) + { + if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ReLU(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else if (alpha == 6.f && beta == 0.f) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ReLU6(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error( + "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)"); + } + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"}; + } + break; + case ElementwiseActivationType::kTanh: + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(op_type); + _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this, + std::placeholders::_1, std::placeholders::_2); + } + else if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; + } + break; + default: + throw std::runtime_error("ElementwiseActivationLayer: unsupported op type"); + } +} + +void ElementwiseActivationLayer::run() { _kernel(_input, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h index 35a184074..3ef580041 100644 --- a/runtime/onert/backend/cpu/ops/TanhLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,26 +30,33 @@ namespace cpu namespace ops { -class TanhLayer : public ::onert::exec::IFunction +enum class ElementwiseActivationType { -public: - TanhLayer(); + kLogistic, + kReLU, + kTanh +}; +class ElementwiseActivationLayer : public ::onert::exec::IFunction +{ public: - void tanhFloat32(); + ElementwiseActivationLayer(); - void tanhQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); +public: + void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta, + const ElementwiseActivationType op_type); void run() override; - void PopulateLookupTable(); + void PopulateLookupTable(const ElementwiseActivationType op_type); + + void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output); private: const IPortableTensor *_input; IPortableTensor *_output; uint8_t _table[256]; + std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel; }; } // namespace ops @@ -57,4 +64,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc new file mode 100644 index 000000000..ea3c1e7cd --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseBinaryLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/LogicalOr.h> +#include <cker/operation/MaxMin.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +template <typename T> +void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output) +{ + if (!HaveSameShapes(lhs, rhs)) + { + nnfw::cker::LogicalOrBroadcast<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs), + reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + } + else + { + nnfw::cker::LogicalOrElementwise<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer())); + } +} + +template <typename T> +void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) +{ + nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), + getTensorShape(output), reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) +{ + nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), + getTensorShape(output), reinterpret_cast<T *>(output->buffer())); +} + +bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs, + const IPortableTensor *output) +{ + return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) && + (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset()); +} +} // namespace + +void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, const ElementwiseBinaryType op_type) +{ + assert(lhs != nullptr); + assert(rhs != nullptr); + assert(output != nullptr); + + _lhs = lhs; + _rhs = rhs; + _output = output; + + switch (op_type) + { + case ElementwiseBinaryType::kLogicalOr: + if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) + { + _kernel = logicalOrGeneric<bool>; + } + else + { + throw std::runtime_error{"LogicalOr: Unsupported data type"}; + } + break; + case ElementwiseBinaryType::kMax: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + if (!haveSameQauntInfo(_lhs, _rhs, _output)) + { + throw std::runtime_error("Max NYI for quantized"); + } + _kernel = maximumGeneric<uint8_t>; + } + else if (_lhs->data_type() == OperandType::FLOAT32) + { + _kernel = maximumGeneric<float>; + } + else + { + throw std::runtime_error{"Max: unsupported data type"}; + } + break; + case ElementwiseBinaryType::kMin: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + if (!haveSameQauntInfo(_lhs, _rhs, _output)) + { + throw std::runtime_error("Min NYI for quantized"); + } + _kernel = minimumGeneric<uint8_t>; + } + else if (_lhs->data_type() == OperandType::INT32) + { + _kernel = minimumGeneric<int32_t>; + } + else if (_lhs->data_type() == OperandType::FLOAT32) + { + _kernel = minimumGeneric<float>; + } + else + { + throw std::runtime_error{"Min: unsupported data type"}; + } + break; + default: + throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"}; + } +} + +void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h index ed8dc5b0f..052747a4c 100644 --- a/runtime/onert/backend/cpu/ops/MaxLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,20 +30,25 @@ namespace cpu namespace ops { -class MaxLayer : public ::onert::exec::IFunction +enum class ElementwiseBinaryType +{ + kLogicalAnd, + kLogicalOr, + kMax, + kMin, +}; + +class ElementwiseBinaryLayer : public ::onert::exec::IFunction { public: - MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) + ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) { // DO NOTHING } public: - template <typename T> void maximum(); - - void maxQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output); + void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + const ElementwiseBinaryType op_type); void run() override; @@ -51,6 +56,7 @@ private: const IPortableTensor *_lhs; const IPortableTensor *_rhs; IPortableTensor *_output; + std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -58,4 +64,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc new file mode 100644 index 000000000..f8f89ab15 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseUnaryLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Elementwise.h> +#include <cker/operation/Erf.h> +#include <cker/operation/Exp.h> +#include <cker/operation/LogicalNot.h> +#include <cker/operation/Quantize.h> +#include <cker/operation/Round.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +void absFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename FromT> +void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out) +{ + switch (data_type_out) + { + case ir::DataType::FLOAT32: + std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); }); + return; + case ir::DataType::INT32: + std::transform(in, in + num_elements, out.i32, + [](FromT a) { return static_cast<int32_t>(a); }); + return; + case ir::DataType::UINT32: + std::transform(in, in + num_elements, out.u32, + [](FromT a) { return static_cast<uint32_t>(a); }); + return; + case ir::DataType::UINT8: + std::transform(in, in + num_elements, out.u8, + [](FromT a) { return static_cast<uint8_t>(a); }); + return; + case ir::DataType::BOOL8: + std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); }); + return; + case ir::DataType::INT64: + std::transform(in, in + num_elements, out.i64, + [](FromT a) { return static_cast<int64_t>(a); }); + return; + default: + throw std::runtime_error("Cast: Not supported output type" + + std::to_string((int)data_type_out)); + } +} + +void cast(const IPortableTensor *input, IPortableTensor *output) +{ + auto input_buf = input->buffer(); + auto output_buf = output->buffer(); + const auto in = *reinterpret_cast<const DataPtr *>(&input_buf); + auto out = *reinterpret_cast<DataPtr *>(&output_buf); + + auto input_shape = getTensorShape(input); + auto output_shape = getTensorShape(output); + const auto num_elements = MatchingFlatSize(input_shape, output_shape); + + switch (input->data_type()) + { + case ir::DataType::FLOAT32: + castPtr(in.f, out, num_elements, output->data_type()); + return; + case ir::DataType::INT32: + castPtr(in.i32, out, num_elements, output->data_type()); + return; + case ir::DataType::UINT32: + castPtr(in.u32, out, num_elements, output->data_type()); + return; + case ir::DataType::UINT8: + castPtr(in.u8, out, num_elements, output->data_type()); + return; + case ir::DataType::BOOL8: + castPtr(in.b, out, num_elements, output->data_type()); + return; + case ir::DataType::INT64: + castPtr(in.i64, out, num_elements, output->data_type()); + return; + default: + throw std::runtime_error("Cast: unsupported data type" + + std::to_string((int)input->data_type())); + } +} + +void cosFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void expFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void erfFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void logFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void logicalNot(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()), + getTensorShape(output), reinterpret_cast<bool *>(output->buffer())); +} + +void negFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename InputT, typename OutputT> +void affineQuantize(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()), + getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()), + output->data_scale(), output->data_offset()); +} + +void roundFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void sinFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + if (!HaveSameShapes(input, output)) + throw std::runtime_error{"ZerosLike: input and output shape don't match."}; + + auto element_size = getTensorShape(input).FlatSize(); + + memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T)); +} +} // namespace + +void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output, + const ElementwiseUnaryType op_type) +{ + assert(input != nullptr); + assert(output != nullptr); + + _input = input; + _output = output; + + switch (op_type) + { + case ElementwiseUnaryType::kAbs: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = absFloat32; + } + else + { + throw std::runtime_error{"Abs: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kCast: + _kernel = cast; + break; + case ElementwiseUnaryType::kCos: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = cosFloat32; + } + else + { + throw std::runtime_error{"Cos: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kExp: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = expFloat32; + } + else + { + throw std::runtime_error{"Exp: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kErf: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = erfFloat32; + } + else + { + throw std::runtime_error{"Exp: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kLog: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = logFloat32; + } + else + { + throw std::runtime_error{"Log: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kLogicalNot: + if ((input->data_type() == OperandType::BOOL8)) + { + _kernel = logicalNot; + } + else + { + throw std::runtime_error{"LogicalNot: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kNeg: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = negFloat32; + } + else + { + throw std::runtime_error{"Neg: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kQuantize: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = affineQuantize<float, uint8_t>; + } + else + { + throw std::runtime_error{"Quantize: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kRound: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = roundFloat32; + } + else + { + throw std::runtime_error{"Round: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kRSqrt: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = rsqrtFloat32; + } + else + { + throw std::runtime_error{"RSqrt: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kSin: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = sinFloat32; + } + else + { + throw std::runtime_error{"Sin: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kZerosLike: + if (input->data_type() == OperandType::FLOAT32) + { + _kernel = zerosLikeFloat32<float>; + } + else if (input->data_type() == OperandType::INT32) + { + _kernel = zerosLikeFloat32<int32_t>; + } + else + { + throw std::runtime_error{"ZerosLike: Unsupported data type"}; + } + break; + default: + throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"}; + } +} + +void ElementwiseUnaryLayer::run() { _kernel(_input, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h index 994d17a30..74968386d 100644 --- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,23 +30,41 @@ namespace cpu namespace ops { -class ReLU6Layer : public ::onert::exec::IFunction +enum class ElementwiseUnaryType { -public: - ReLU6Layer(); + kAbs, + kCast, + kCos, + kErf, + kExp, + kLog, + kLogicalNot, + kNeg, + kQuantize, + kRound, + kRSqrt, + kSin, + kZerosLike +}; +class ElementwiseUnaryLayer : public ::onert::exec::IFunction +{ public: - void relu6Float32(); + ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel() + { + // DO NOTHING + } - void relu6Quant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); +public: + void configure(const IPortableTensor *input, IPortableTensor *output, + const ElementwiseUnaryType op_type); void run() override; private: const IPortableTensor *_input; IPortableTensor *_output; + std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -54,4 +72,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc deleted file mode 100644 index 4dbec9cd5..000000000 --- a/runtime/onert/backend/cpu/ops/ExpLayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ExpLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Exp.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ExpLayer::expFloat32() -{ - nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void ExpLayer::expQuant8() -{ - // cker quant8 exp is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ExpLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - expFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - expQuant8(); - } - else - { - throw std::runtime_error{"Exp: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/backend/cpu/ops/ExpLayer.h deleted file mode 100644 index cd27b0e40..000000000 --- a/runtime/onert/backend/cpu/ops/ExpLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class ExpLayer : public ::onert::exec::IFunction -{ -public: - ExpLayer(); - -public: - void expFloat32(); - - void expQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc deleted file mode 100644 index 307c15bc4..000000000 --- a/runtime/onert/backend/cpu/ops/LogLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogLayer::LogLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogLayer::logFloat32() -{ - nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; } - -void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void LogLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - logFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - logQuant8(); - } - else - { - throw std::runtime_error{"Log: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h deleted file mode 100644 index 2f6b4b570..000000000 --- a/runtime/onert/backend/cpu/ops/LogLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogLayer : public ::onert::exec::IFunction -{ -public: - LogLayer(); - -public: - void logFloat32(); - - void logQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc index 06dde4fc4..1d7ee6caa 100644 --- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc @@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0. // DO NOTHING } +void LogSoftMaxLayer::PopulateLookupTable(const float kBeta) +{ + const float scale = -_input->data_scale() * kBeta; + const int32_t max_uint8 = std::numeric_limits<uint8_t>::max(); + for (int32_t val = 0; val <= max_uint8; ++val) + { + _table[max_uint8 - val] = expf(scale * val); + } +} + void LogSoftMaxLayer::logsoftmaxFloat32() { nnfw::cker::SoftmaxParams op_params; @@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32() void LogSoftMaxLayer::logsoftmaxQuant8() { - // NYI + nnfw::cker::SoftmaxParams op_params; + op_params.beta = _beta; + op_params.axis = _axis; + op_params.table = _table; + op_params.zero_point = _output->data_offset(); + op_params.scale = _output->data_scale(); + nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input), + reinterpret_cast<const uint8_t *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); } void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis, @@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, _output = output; _beta = beta; _axis = axis; + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(_beta); + } } void LogSoftMaxLayer::run() @@ -66,7 +88,7 @@ void LogSoftMaxLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - throw std::runtime_error{"LogSoftmax : NYI"}; + logsoftmaxQuant8(); } else { diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h index ba9deca17..1533f3361 100644 --- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h +++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h @@ -45,12 +45,15 @@ public: void run(); + void PopulateLookupTable(const float kBeta); + private: const IPortableTensor *_input; IPortableTensor *_output; float _beta; int _axis; + float _table[256]; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc deleted file mode 100644 index f2192c148..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogicalNotLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/LogicalNot.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogicalNotLayer::logicalNotBool8() -{ - nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer())); -} - -void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void LogicalNotLayer::run() -{ - if (_input->data_type() == OperandType::BOOL8) - { - logicalNotBool8(); - } - else - { - throw std::runtime_error{"LogicalNot: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h deleted file mode 100644 index 5543cca3d..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogicalNotLayer : public ::onert::exec::IFunction -{ -public: - LogicalNotLayer(); - -public: - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void logicalNotBool8(); - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc deleted file mode 100644 index 5b7c9f6f0..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogicalOrLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/LogicalOr.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -void LogicalOrLayer::lorBool8() -{ - if (!HaveSameShapes(_lhs, _rhs)) - { - nnfw::cker::LogicalOrBroadcast<bool>( - getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs), - reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output), - reinterpret_cast<bool *>(_output->buffer())); - } - else - { - nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs), - reinterpret_cast<const bool *>(_lhs->buffer()), - reinterpret_cast<const bool *>(_rhs->buffer()), - reinterpret_cast<bool *>(_output->buffer())); - } -} - -void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void LogicalOrLayer::run() -{ - if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) - { - lorBool8(); - } - else - { - throw std::runtime_error{"LogicalOr: Unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h deleted file mode 100644 index efaf396e8..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class LogicalOrLayer : public ::onert::exec::IFunction -{ -public: - LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // Nothing - } - -public: - void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output); - - void run() override; - -private: - void lorBool8(); - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc deleted file mode 100644 index 140ab4d2c..000000000 --- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogisticLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Logistic.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogisticLayer::populateLookupTable() -{ - const auto input_scale = static_cast<double>(_input->data_scale()); - const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); - const auto output_scale = static_cast<double>(_output->data_scale()); - const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); - const float inverse_scale = 1 / output_scale; - int32_t maxval = std::numeric_limits<uint8_t>::max(); - int32_t minval = std::numeric_limits<uint8_t>::min(); - for (int32_t val = minval; val <= maxval; ++val) - { - const float dequantized = input_scale * (val - input_zero_point); - const float transformed = 1.0f / (1.0f + std::exp(-dequantized)); - const float rescaled = std::round(transformed * inverse_scale); - const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); - _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); - } -} - -void LogisticLayer::logisticFloat32() -{ - nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void LogisticLayer::logisticQuant8() -{ - const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output)); - const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer()); - uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer()); - - for (int i = 0; i < size; ++i) - { - output_data[i] = _table[input_data[i]]; - } -} - -void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; - - if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - if (_output->data_scale() != 1.f / 256) - { - throw std::runtime_error{"incorrect scale for output"}; - } - populateLookupTable(); - } -} - -void LogisticLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - logisticFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - logisticQuant8(); - } - else - { - throw std::runtime_error{"Logistic: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h deleted file mode 100644 index cac77939d..000000000 --- a/runtime/onert/backend/cpu/ops/LogisticLayer.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogisticLayer : public ::onert::exec::IFunction -{ -public: - LogisticLayer(); - -public: - void logisticFloat32(); - - void logisticQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - void populateLookupTable(); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; - - uint8_t _table[256]; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc deleted file mode 100644 index 9631983be..000000000 --- a/runtime/onert/backend/cpu/ops/MaxLayer.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MaxLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/MaxMin.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -template <typename T> void MaxLayer::maximum() -{ - nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<T *>(_output->buffer())); -} - -void MaxLayer::maxQuant8() -{ - if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale()) - { - if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset()) - { - return nnfw::cker::Max<uint8_t>( - getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - } - } - throw std::runtime_error("Max NYI for quantized"); -} - -void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void MaxLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - maximum<float>(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - maxQuant8(); - } - else - { - throw std::runtime_error{"Max: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc deleted file mode 100644 index 1e983b408..000000000 --- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MaxPoolLayer.h" - -#include <cker/operation/MaxPool.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -#define MAXPOOLING_PARAMETERS \ - nnfw::cker::PoolParams op_params; \ - op_params.stride_height = _strideHeight; \ - op_params.stride_width = _strideWidth; \ - op_params.filter_height = _kernelHeight; \ - op_params.filter_width = _kernelWidth; \ - op_params.padding_values.height = (int8_t)_paddingTop; \ - op_params.padding_values.width = (int8_t)_paddingLeft; - -MaxPoolLayer::MaxPoolLayer() - : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0), - _activation(ir::Activation::NONE) -{ - // DO NOTHING -} - -void MaxPoolLayer::maxPoolFloat32() -{ - MAXPOOLING_PARAMETERS - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - nnfw::cker::MaxPool(op_params, getTensorShape(_input), - reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output), - reinterpret_cast<float *>(_output->buffer())); -} -void MaxPoolLayer::maxPoolQuant8() -{ - MAXPOOLING_PARAMETERS - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - nnfw::cker::MaxPool(op_params, getTensorShape(_input), - reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output), - reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output) -{ - _input = input; - _paddingLeft = paddingLeft; - _paddingRight = paddingRight; - _paddingTop = paddingTop; - _paddingBottom = paddingBottom; - _strideWidth = strideWidth; - _strideHeight = strideHeight; - _kernelWidth = kernelWidth; - _kernelHeight = kernelHeight; - _activation = activation; - _output = output; -} - -void MaxPoolLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - maxPoolFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - maxPoolQuant8(); - } - else - { - throw std::runtime_error{"MaxPool: unsupported data type"}; - } -} - -#undef MAXPOOLING_PARAMETERS - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc deleted file mode 100644 index 20859673b..000000000 --- a/runtime/onert/backend/cpu/ops/MinLayer.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MinLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/MaxMin.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -template <typename T> void MinLayer::minimum() -{ - nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<T *>(_output->buffer())); -} - -void MinLayer::minQuant8() -{ - if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale()) - { - if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset()) - { - return nnfw::cker::Min<uint8_t>( - getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - } - } - throw std::runtime_error("Min NYI for quantized"); -} - -void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void MinLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - minimum<float>(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - minQuant8(); - } - else if (_lhs->data_type() == OperandType::INT32) - { - minimum<int32_t>(); - } - else - { - throw std::runtime_error{"Min: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h deleted file mode 100644 index 9bd114e54..000000000 --- a/runtime/onert/backend/cpu/ops/MinLayer.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class MinLayer : public ::onert::exec::IFunction -{ -public: - MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - template <typename T> void minimum(); - - void minQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc deleted file mode 100644 index eef73edf3..000000000 --- a/runtime/onert/backend/cpu/ops/MulLayer.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MulLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void MulLayer::mulFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void MulLayer::mulQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - - double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale(); - QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift); - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void MulLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - mulFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - mulQuant8(); - } - else - { - throw std::runtime_error{"Mul: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h deleted file mode 100644 index 2c4a98875..000000000 --- a/runtime/onert/backend/cpu/ops/MulLayer.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class MulLayer : public ::onert::exec::IFunction -{ -public: - MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void mulFloat32(); - - void mulQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc deleted file mode 100644 index 2cb95b771..000000000 --- a/runtime/onert/backend/cpu/ops/NegLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "NegLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -NegLayer::NegLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void NegLayer::negFloat32() -{ - nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; } - -void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void NegLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - negFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - negQuant8(); - } - else - { - throw std::runtime_error{"Neg: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h deleted file mode 100644 index addf84ec2..000000000 --- a/runtime/onert/backend/cpu/ops/NegLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class NegLayer : public ::onert::exec::IFunction -{ -public: - NegLayer(); - -public: - void negFloat32(); - - void negQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc new file mode 100644 index 000000000..85d02a751 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PoolLayer.h" + +#include <cker/operation/AveragePool.h> +#include <cker/operation/MaxPool.h> + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +template <typename T> +void avgPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input, + IPortableTensor *output) +{ + nnfw::cker::AveragePool<T>(params, getTensorShape(input), + reinterpret_cast<const T *>(input->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +void maxPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input, + IPortableTensor *output) +{ + nnfw::cker::MaxPool<T>(params, getTensorShape(input), + reinterpret_cast<const T *>(input->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +std::function<void(const IPortableTensor *, IPortableTensor *)> +generateKernelGeneric(const nnfw::cker::PoolParams ¶ms, PoolType op_type) +{ + if (op_type == PoolType::kAvg) + { + return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2); + } + else if (op_type == PoolType::kMax) + { + return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2); + } + else + { + throw std::runtime_error{"Pool: unsupported pool type"}; + } +} +} // namespace + +PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel() +{ + // DO NOTHING +} + +#define POOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = strideHeight; \ + op_params.stride_width = strideWidth; \ + op_params.filter_height = kernelHeight; \ + op_params.filter_width = kernelWidth; \ + op_params.padding_values.height = (int8_t)paddingTop; \ + op_params.padding_values.width = (int8_t)paddingLeft; + +void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t, + const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, + IPortableTensor *output, const PoolType op_type) +{ + assert(input != nullptr); + assert(output != nullptr); + + _input = input; + _output = output; + + POOLING_PARAMETERS + if (_input->data_type() == OperandType::FLOAT32) + { + float output_activation_min = 0; + float output_activation_max = 0; + CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + _kernel = generateKernelGeneric<float>(op_params, op_type); + } + else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + _kernel = generateKernelGeneric<uint8_t>(op_params, op_type); + } + else + { + throw std::runtime_error{"Pool: unsupported data type"}; + } +} + +void PoolLayer::run() { _kernel(_input, _output); } + +#undef AVGPOOLING_PARAMETERS + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h index 4c5109f64..b37835946 100644 --- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h +++ b/runtime/onert/backend/cpu/ops/PoolLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ #include <backend/IPortableTensor.h> #include "OperationUtils.h" @@ -31,22 +31,25 @@ namespace cpu namespace ops { -class MaxPoolLayer : public ::onert::exec::IFunction +enum class PoolType { -public: - MaxPoolLayer(); + kAvg, + kL2, + kMax, +}; +class PoolLayer : public ::onert::exec::IFunction +{ public: - void maxPoolFloat32(); - - void maxPoolQuant8(); + PoolLayer(); +public: void configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t kernelWidth, const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output); + IPortableTensor *output, const PoolType op_type); void run() override; @@ -54,17 +57,7 @@ private: const IPortableTensor *_input; IPortableTensor *_output; - uint32_t _paddingLeft; - uint32_t _paddingTop; - uint32_t _paddingRight; - uint32_t _paddingBottom; - - uint32_t _strideWidth; - uint32_t _strideHeight; - uint32_t _kernelWidth; - uint32_t _kernelHeight; - - ir::Activation _activation; + std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -72,4 +65,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc deleted file mode 100644 index 45fc148bf..000000000 --- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "QuantizeLayer.h" - -#include <cker/operation/Quantize.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize() -{ - nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()), - _output->data_scale(), _output->data_offset()); -} - -void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void QuantizeLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - affineQuantize<float, uint8_t>(); - } - else - { - throw std::runtime_error{"Quantize: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h deleted file mode 100644 index b4e7aca40..000000000 --- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class QuantizeLayer : public ::onert::exec::IFunction -{ -public: - QuantizeLayer(); - -public: - template <typename InputT, typename OutputT> void affineQuantize(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc index 185d7554e..4690bdf72 100644 --- a/runtime/onert/backend/cpu/ops/RoundLayer.cc +++ b/runtime/onert/backend/cpu/ops/RankLayer.cc @@ -14,12 +14,10 @@ * limitations under the License. */ -#include "RoundLayer.h" +#include "RankLayer.h" #include "OperationUtils.h" -#include <cker/operation/Round.h> - namespace onert { namespace backend @@ -28,32 +26,28 @@ namespace cpu { namespace ops { -RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} -void RoundLayer::roundFloat32() +RankLayer::RankLayer() : _input(nullptr), _output(nullptr) { - nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); + // DO NOTHING } -void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output) +void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output) { _input = input; _output = output; } -void RoundLayer::run() +void RankLayer::run() { - if (_input->data_type() == OperandType::FLOAT32) + if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32) { - roundFloat32(); + int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer()); + output_data[0] = _input->num_dimensions(); } else { - throw std::runtime_error{"Round: unsupported data type"}; + throw std::runtime_error{"Rank : unsupported data type"}; } } diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h index 054894203..6282ceb07 100644 --- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h +++ b/runtime/onert/backend/cpu/ops/RankLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ #include <backend/IPortableTensor.h> @@ -29,11 +29,13 @@ namespace cpu { namespace ops { -class ZerosLikeLayer : public ::onert::exec::IFunction + +class RankLayer : public ::onert::exec::IFunction { public: - ZerosLikeLayer(); + RankLayer(); +public: void configure(const IPortableTensor *input, IPortableTensor *output); void run() override; @@ -48,4 +50,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc deleted file mode 100644 index 26eb35e0d..000000000 --- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ReLU6Layer.h" - -#include "OperationUtils.h" - -#include <cker/operation/ReLU6.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ReLU6Layer::relu6Float32() -{ - nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - reinterpret_cast<float *>(_output->buffer())); -} - -void ReLU6Layer::relu6Quant8() -{ - // cker quant8 relu is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ReLU6Layer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - relu6Float32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - relu6Quant8(); - } - else - { - throw std::runtime_error{"ReLU6: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc deleted file mode 100644 index cb4529feb..000000000 --- a/runtime/onert/backend/cpu/ops/ReLULayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ReLULayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/ReLU.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ReLULayer::reluFloat32() -{ - nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void ReLULayer::reluQuant8() -{ - // cker quant8 relu is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ReLULayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - reluFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - reluQuant8(); - } - else - { - throw std::runtime_error{"ReLU: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h deleted file mode 100644 index 4ba2be772..000000000 --- a/runtime/onert/backend/cpu/ops/ReLULayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class ReLULayer : public ::onert::exec::IFunction -{ -public: - ReLULayer(); - -public: - void reluFloat32(); - - void reluQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc index fe22dbed7..bb5f85d60 100644 --- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc +++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc @@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std: } template <typename T> -void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes, - bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (reduce_type) { case ReduceType::kSum: - return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel, - [](const T current, const T in) -> T { return in + current; }); + return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel, + [](const T current, const T in) -> T { return in + current; }); break; case ReduceType::kProd: - return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel, - [](const T current, const T in) -> T { return in * current; }); + return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel, + [](const T current, const T in) -> T { return in * current; }); break; case ReduceType::kMax: - return evalLogic<T>( - input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel, + return std::bind( + &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel, [](const T current, const T in) -> T { return (in > current) ? in : current; }); break; case ReduceType::kMin: - return evalLogic<T>( - input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel, + return std::bind( + &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + keep_dims, std::numeric_limits<T>::max(), reduce_kernel, [](const T current, const T in) -> T { return (in < current) ? in : current; }); break; default: @@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std:: // Template specialization for bool type template <> -void evalType<bool>(const IPortableTensor *input, IPortableTensor *output, - const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel, - ReduceType reduce_type) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (reduce_type) { case ReduceType::kAny: - return evalLogic<bool>( - input, output, axes, keep_dims, false, reduce_kernel, - [](const bool current, const bool in) -> bool { return in || current; }); + return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, false, reduce_kernel, + [](const bool current, const bool in) -> bool { return in || current; }); break; case ReduceType::kAll: - return evalLogic<bool>( - input, output, axes, keep_dims, true, reduce_kernel, - [](const bool current, const bool in) -> bool { return in && current; }); + return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, true, reduce_kernel, + [](const bool current, const bool in) -> bool { return in && current; }); break; default: throw std::runtime_error{"Reduce: Unsupported reduce type"}; } } -template <ReduceType reduce_type> -void evalGeneric(const IPortableTensor *input, IPortableTensor *output, - const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +generateKernelGeneric(const IPortableTensor *input, bool keep_dims, + nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (input->data_type()) { case OperandType::FLOAT32: - return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<float>(keep_dims, reduce_kernel, reduce_type); case OperandType::INT32: - return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type); case OperandType::BOOL8: - return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<bool>(keep_dims, reduce_kernel, reduce_type); default: throw std::runtime_error{"Reduce(generic): unsupported data type"}; } } +// TODO Refine this function void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel) @@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output, return; } - evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel); + const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum); + kernel(input, output, axes); } } // namespace ReduceLayer::ReduceLayer() - : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny), - _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce()) + : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()), + _kernel() { // DO NOTHING } @@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor _input = input; _axes = axes; _output = output; - _reduceType = reduceType; - _keep_dims = keep_dims; -} -void ReduceLayer::run() -{ - const auto axes = getReducerAxes(_axes); - switch (_reduceType) + switch (reduceType) { case ReduceType::kSum: if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, *_reduce_kernel); return; } - evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum); break; case ReduceType::kProd: - evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd); break; case ReduceType::kMax: - evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax); break; case ReduceType::kMin: - evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin); break; case ReduceType::kAny: - evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny); break; case ReduceType::kAll: - evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll); break; default: throw std::runtime_error{"ReduceSum: Unsupported reduce type"}; } } +void ReduceLayer::run() +{ + const auto axes = getReducerAxes(_axes); + _kernel(_input, _output, axes); +} + } // namespace ops } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h index 8e7bcdb07..332d399bd 100644 --- a/runtime/onert/backend/cpu/ops/ReduceLayer.h +++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h @@ -65,10 +65,11 @@ private: const IPortableTensor *_input; const IPortableTensor *_axes; IPortableTensor *_output; - ReduceType _reduceType; - bool _keep_dims; std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel; + std::function<void(const IPortableTensor *input, IPortableTensor *output, + const std::vector<int> &axes)> + _kernel; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h deleted file mode 100644 index fc6a46c0d..000000000 --- a/runtime/onert/backend/cpu/ops/RoundLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class RoundLayer : public ::onert::exec::IFunction -{ -public: - RoundLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void roundFloat32(); - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc deleted file mode 100644 index 0bd468f96..000000000 --- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "RsqrtLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void RsqrtLayer::rsqrtFloat32() -{ - nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; } - -void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void RsqrtLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - rsqrtFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - rsqrtQuant8(); - } - else - { - throw std::runtime_error{"Rsqrt: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h deleted file mode 100644 index 49abbb08d..000000000 --- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class RsqrtLayer : public ::onert::exec::IFunction -{ -public: - RsqrtLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void rsqrtFloat32(); - void rsqrtQuant8(); - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc deleted file mode 100644 index 2a6b11753..000000000 --- a/runtime/onert/backend/cpu/ops/SinLayer.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "SinLayer.h" -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -SinLayer::SinLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void SinLayer::sinFloat32() -{ - nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; } - -void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void SinLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - sinFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - sinQuant8(); - } - else - { - throw std::runtime_error{"Sin: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h deleted file mode 100644 index 348350f41..000000000 --- a/runtime/onert/backend/cpu/ops/SinLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class SinLayer : public ::onert::exec::IFunction -{ -public: - SinLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void sinFloat32(); - void sinQuant8(); - - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc index 6e2bb584a..095e67abc 100644 --- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc @@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0) // DO NOTHING } -// Performs softmax along the input of size (input_size * batch_size). -void Softmax(const float *in, const int input_size, const int batch_size, const float beta, - float *out) +void SoftMaxLayer::softmaxFloat32() { - assert(input_size > 0); - - // For each batch - for (int b = 0; b < batch_size; b++) + if (getNumberOfDimensions(_input) == 1) { - // Find the max coeff. - float max_coeff = in[0]; - for (int i = 1; i < input_size; i++) - { - if (in[i] > max_coeff) - max_coeff = in[i]; - } - - // Compute the normalized sum of exps. - float exp_sum = 0.0; - for (int i = 0; i < input_size; i++) - { - out[i] = std::exp((in[i] - max_coeff) * beta); - exp_sum += out[i]; - } - - // Divide by the sum of exps. - float reciprocal_sum_exp = 1.f / exp_sum; - for (int i = 0; i < input_size; i++) - { - out[i] *= reciprocal_sum_exp; - } - - // Advance in and out pointers for the next batch. - in += input_size; - out += input_size; + uint32_t input_size = getNumberOfElements(_input); + nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta, + reinterpret_cast<float *>(_output->buffer())); } -} - -void SoftMaxLayer::softmaxFloat32() -{ - if (getNumberOfDimensions(_input) == 2) + else if (getNumberOfDimensions(_input) == 2) { uint32_t batch_size = getSizeOfDimension(_input, 0); if (batch_size == 0) throw std::runtime_error("batch_size should not be 0"); uint32_t input_size = getNumberOfElements(_input) / batch_size; - Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta, - reinterpret_cast<float *>(_output->buffer())); + nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, + _beta, reinterpret_cast<float *>(_output->buffer())); } else if (getNumberOfDimensions(_input) == 4) { @@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32() } else { - throw std::runtime_error{"only 2D and 4D tensors supported"}; + throw std::runtime_error{"only 1D, 2D and 4D tensors supported"}; } } diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc deleted file mode 100644 index 597d52952..000000000 --- a/runtime/onert/backend/cpu/ops/SubLayer.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "SubLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void SubLayer::subFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void SubLayer::subInt32() -{ - int32_t output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); -} - -void SubLayer::subQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - // Parameters for scaled quantized computation - op_params.left_shift = 20; - // Zero-points of input and output tensors - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); - assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); - assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); - - // Compute normalized scale for _lhs and _rhs values, - // and represent in 32-bit fixed point - const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale()); - const double real_lhs_scale = _lhs->data_scale() / norm_max_scale; - const double real_rhs_scale = _rhs->data_scale() / norm_max_scale; - // output scale is used to normalize final result, so we invert the scale here - const double real_output_scale = - norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift)); - - // Represent the scales as fixed int32_t multipliers, and int32_t shifts - QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); - QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); - op_params.input2_multiplier *= -1; - QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void SubLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - subFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - subQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - subInt32(); - } - else - { - throw std::runtime_error{"Sub: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h deleted file mode 100644 index 86f32ca6d..000000000 --- a/runtime/onert/backend/cpu/ops/SubLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class SubLayer : public ::onert::exec::IFunction -{ -public: - SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void subFloat32(); - - void subQuant8(); - - void subInt32(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc deleted file mode 100644 index 910ac1f41..000000000 --- a/runtime/onert/backend/cpu/ops/TanhLayer.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "TanhLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Tanh.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void TanhLayer::PopulateLookupTable() -{ - const auto input_scale = static_cast<double>(_input->data_scale()); - const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); - const auto output_scale = static_cast<double>(_output->data_scale()); - const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); - const float inverse_scale = 1 / output_scale; - int32_t maxval = std::numeric_limits<uint8_t>::max(); - int32_t minval = std::numeric_limits<uint8_t>::min(); - for (int32_t val = minval; val <= maxval; ++val) - { - const float dequantized = input_scale * (val - input_zero_point); - const float transformed = std::tanh(dequantized); - const float rescaled = std::round(transformed * inverse_scale); - const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); - _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); - } -} - -void TanhLayer::tanhFloat32() -{ - nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void TanhLayer::tanhQuant8() -{ - const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output)); - const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer()); - uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer()); - - for (int i = 0; i < size; ++i) - { - output_data[i] = _table[input_data[i]]; - } -} - -void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; - if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - PopulateLookupTable(); - } -} - -void TanhLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - tanhFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - tanhQuant8(); - } - else - { - throw std::runtime_error{"Tanh: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc deleted file mode 100644 index ae8084518..000000000 --- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ZerosLikeLayer.h" - -#include "OperationUtils.h" - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ZerosLikeLayer::run() -{ - if (!HaveSameShapes(_input, _output)) - throw std::runtime_error{"ZerosLike: input and output shape don't match."}; - - auto element_size = getTensorShape(_input).FlatSize(); - - switch (_input->data_type()) - { - case OperandType::FLOAT32: - memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float)); - break; - case OperandType::INT32: - memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t)); - break; - default: - throw std::runtime_error{"ZerosLike: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert |