summaryrefslogtreecommitdiff
path: root/runtime/onert/backend
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-09-05 21:49:46 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-09-05 21:49:46 +0900
commit74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch)
tree3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert/backend
parent042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff)
downloadnnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2
nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip
Diffstat (limited to 'runtime/onert/backend')
-rw-r--r--runtime/onert/backend/acl_cl/Backend.h10
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.cc99
-rw-r--r--runtime/onert/backend/acl_cl/ConstantInitializer.h32
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.cc1238
-rw-r--r--runtime/onert/backend/acl_cl/KernelGenerator.h37
-rw-r--r--runtime/onert/backend/acl_cl/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_cl/TensorManager.h2
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.cc128
-rw-r--r--runtime/onert/backend/acl_common/AclConstantInitializer.h61
-rw-r--r--runtime/onert/backend/acl_common/AclFunction.h6
-rw-r--r--runtime/onert/backend/acl_common/AclKernelGen.h149
-rw-r--r--runtime/onert/backend/acl_common/AclTensorBuilder.h42
-rw-r--r--runtime/onert/backend/acl_common/AclTensorRegistry.h59
-rw-r--r--runtime/onert/backend/acl_common/Convert.cc65
-rw-r--r--runtime/onert/backend/acl_common/Convert.h7
-rw-r--r--runtime/onert/backend/acl_neon/Backend.h9
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.cc97
-rw-r--r--runtime/onert/backend/acl_neon/ConstantInitializer.h26
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.cc1249
-rw-r--r--runtime/onert/backend/acl_neon/KernelGenerator.h36
-rw-r--r--runtime/onert/backend/acl_neon/Optimizer.cc2
-rw-r--r--runtime/onert/backend/acl_neon/TensorManager.h2
-rw-r--r--runtime/onert/backend/cpu/Backend.h8
-rw-r--r--runtime/onert/backend/cpu/BackendContext.h6
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.cc4
-rw-r--r--runtime/onert/backend/cpu/ConstantInitializer.h8
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc851
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.h34
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.cc31
-rw-r--r--runtime/onert/backend/cpu/TensorBuilder.h26
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/AbsLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.cc166
-rw-r--r--runtime/onert/backend/cpu/ops/AddLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.cc118
-rw-r--r--runtime/onert/backend/cpu/ops/AvgPoolLayer.h75
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc225
-rw-r--r--runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h (renamed from runtime/onert/backend/cpu/ops/DivLayer.h)28
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.cc112
-rw-r--r--runtime/onert/backend/cpu/ops/CastLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.cc21
-rw-r--r--runtime/onert/backend/cpu/ops/ConvolutionLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/CosLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/DivLayer.cc95
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc173
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h (renamed from runtime/onert/backend/cpu/ops/TanhLayer.h)29
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc151
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxLayer.h)26
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc336
-rw-r--r--runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h (renamed from runtime/onert/backend/cpu/ops/ReLU6Layer.h)38
-rw-r--r--runtime/onert/backend/cpu/ops/ExpLayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ExpLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/LogLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc26
-rw-r--r--runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h3
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.cc64
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalNotLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.cc76
-rw-r--r--runtime/onert/backend/cpu/ops/LogicalOrLayer.h59
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.cc108
-rw-r--r--runtime/onert/backend/cpu/ops/LogisticLayer.h60
-rw-r--r--runtime/onert/backend/cpu/ops/MaxLayer.cc85
-rw-r--r--runtime/onert/backend/cpu/ops/MaxPoolLayer.cc115
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.cc89
-rw-r--r--runtime/onert/backend/cpu/ops/MinLayer.h61
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.cc116
-rw-r--r--runtime/onert/backend/cpu/ops/MulLayer.h65
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.cc70
-rw-r--r--runtime/onert/backend/cpu/ops/NegLayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.cc132
-rw-r--r--runtime/onert/backend/cpu/ops/PoolLayer.h (renamed from runtime/onert/backend/cpu/ops/MaxPoolLayer.h)37
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.cc63
-rw-r--r--runtime/onert/backend/cpu/ops/QuantizeLayer.h56
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.cc (renamed from runtime/onert/backend/cpu/ops/RoundLayer.cc)24
-rw-r--r--runtime/onert/backend/cpu/ops/RankLayer.h (renamed from runtime/onert/backend/cpu/ops/ZerosLikeLayer.h)12
-rw-r--r--runtime/onert/backend/cpu/ops/ReLU6Layer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLULayer.cc74
-rw-r--r--runtime/onert/backend/cpu/ops/ReLULayer.h57
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.cc90
-rw-r--r--runtime/onert/backend/cpu/ops/ReduceLayer.h5
-rw-r--r--runtime/onert/backend/cpu/ops/RoundLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.cc69
-rw-r--r--runtime/onert/backend/cpu/ops/RsqrtLayer.h53
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.cc68
-rw-r--r--runtime/onert/backend/cpu/ops/SinLayer.h54
-rw-r--r--runtime/onert/backend/cpu/ops/SoftMaxLayer.cc50
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.cc162
-rw-r--r--runtime/onert/backend/cpu/ops/SubLayer.h67
-rw-r--r--runtime/onert/backend/cpu/ops/TanhLayer.cc103
-rw-r--r--runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc63
92 files changed, 2926 insertions, 6106 deletions
diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h
index 8aaf516cd..5c5041378 100644
--- a/runtime/onert/backend/acl_cl/Backend.h
+++ b/runtime/onert/backend/acl_cl/Backend.h
@@ -25,6 +25,7 @@
#include "KernelGenerator.h"
#include "TensorManager.h"
#include "Optimizer.h"
+#include "AclTensorRegistry.h"
namespace onert
{
@@ -47,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index d7f5f8031..31f1c10eb 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -24,78 +24,17 @@ namespace acl_cl
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node)
{
copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS);
}
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::Gather &node)
{
copyInputInitialize(node, ir::operation::Gather::INDICES);
@@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node)
copyInputInitialize(node, ir::operation::HashtableLookup::KEYS);
}
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL);
- const auto &kernel_obj = _operands.at(kernel_index);
- registerPermuteInitializer(kernel_index, kernel_obj);
-}
-
} // namespace acl_cl
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h
index c51f72b11..4f894fd31 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,32 +26,18 @@ namespace backend
namespace acl_cl
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::HashtableLookup &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::EmbeddingLookup &) final;
+ void visit(const ir::operation::Gather &) final;
+ void visit(const ir::operation::HashtableLookup &) final;
+ void visit(const ir::operation::SpaceToBatchND &) final;
};
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc
index a84f983b4..94489253d 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc
@@ -40,15 +40,16 @@ namespace backend
namespace acl_cl
{
-using ::onert::backend::acl_common::asAclClFunction;
+using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
- ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
+ ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ const auto activation = node.param().activation;
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::CLCopy>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ const auto act_info = acl_common::asActivationLayerInfo(activation);
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::CLCast>();
-
- // TODO Support converting float to int32 as round down
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE, act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ act_info);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ICLTensor *> input_tensors;
for (auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
- auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
+ auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor,
::arm_compute::CLFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Reduce &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto keep_dims{node.param().keep_dims};
const auto reduce_type = node.param().reduce_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::CLReduceMean>();
-
const auto acl_axes =
acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::CLReduceOperation>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
- l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
- acl_common::convertReduceType(reduce_type));
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLReduceOperation>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type));
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Squeeze &node)
@@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::CLSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto rank = _ctx.at(ifm_idx).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
rank, pv, frontend_layout, backend_layout);
- auto fn = std::make_unique<::arm_compute::CLPermute>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
+ auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Div &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(),
+ arm_compute::BinaryLogicalOperation::AND);
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_cl KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::CLExpLayer>();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(),
+ output_tensor->handle());
+ ;
+ }
+ else
+ {
+ // TODO Support converting float to int32 as round down
+ fn = acl_common::generateLayer<arm_compute::CLCast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto acl_fn = asAclClFunction(std::move(fn));
+ fn = acl_common::generateLayer<arm_compute::CLActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet");
+ break;
+ }
+ }
+
+ auto acl_fn = asAclFunction(std::move(fn));
_return_fn = std::move(acl_fn);
}
@@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- ::arm_compute::BinaryLogicalOperation::AND);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
- _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
- ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
+ _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor,
+ ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ICLTensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLStackLayer>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
@@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output);
// Revert disabling applied dim_correction
assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
@@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
}
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
+}
+
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Permute &node)
@@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::CLPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::CLCopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::CLActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- auto fn = std::make_unique<::arm_compute::CLScale>();
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
+void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
+ const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)};
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::CLScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(),
+ ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE,
+ ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
- _return_fn = asAclClFunction(std::move(copy_layer));
+ auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ _return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLFloor>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
- l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
- fn = std::move(l);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
+ auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclClFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
@@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
-
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::L2Normalization &node)
@@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
@@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
-
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
-
- fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal,
+ invalid_vertical);
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
@@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
+ auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TopKV2 &node)
@@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node)
const auto k = node.param().k;
- auto values_tensor = _tensor_builder->at(outputValues_index).get();
- auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
- auto input_tensor = _tensor_builder->at(inputData_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get();
- auto fn = std::make_unique<::arm_compute::CLTopKV2>();
+ auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>(
+ input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
- fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
// NOTE The frontend layout and backend layout must be the same for this operation.
// If not the same, we have to add a stage(?) to perform permutation of output tensor. It
@@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::CLGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// Revert disabling applied dim_correction
ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLNeg>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ArgMax &node)
@@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
assert((ifm_shape.rank() - 1) == ofm_shape.rank());
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
auto acl_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>(
+ ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
+ ::arm_compute::ReductionOperation::ARG_IDX_MAX);
- fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
- ::arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
@@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto acl_fn = asAclClFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ICLTensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::CLSplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ICLTensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
@@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- auto fn = std::make_unique<::arm_compute::CLUnstack>();
-
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis);
- _return_fn = asAclClFunction(std::move(fn));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Pad &node)
@@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
}
- auto fn = std::make_unique<::arm_compute::CLPadLayer>();
// Disable applied dim_correction
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
}
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value);
// Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
// It would produce a mistach of result
- _return_fn = asAclClFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
@@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
@@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
- 0);
+ auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0);
- auto acl_fn = asAclClFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_cl
diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h
index 1e3b06489..d188d6d83 100644
--- a/runtime/onert/backend/acl_cl/KernelGenerator.h
+++ b/runtime/onert/backend/acl_cl/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::Mul &) override;
void visit(const ir::operation::Reduce &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Transpose &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Cast &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::InstanceNorm &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::LogicalAnd &) override;
void visit(const ir::operation::LSTM &) override;
void visit(const ir::operation::Pack &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
- void visit(const ir::operation::RSQRT &) override;
- void visit(const ir::operation::ReLU &) override;
void visit(const ir::operation::ResizeBilinear &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
+ void visit(const ir::operation::ResizeNearestNeighbor &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::TransposeConv &) override;
- void visit(const ir::operation::SQRT &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::LogicalNot &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::TopKV2 &) override;
void visit(const ir::operation::Gather &) override;
- void visit(const ir::operation::Neg &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Dequantize &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::Split &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::ConvertFp32ToFp16 &) override;
void visit(const ir::operation::ConvertFp16ToFp32 &) override;
@@ -104,6 +88,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 6ba3143e8..9134d3fb8 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h
index bdbd0364e..ab295dbec 100644
--- a/runtime/onert/backend/acl_cl/TensorManager.h
+++ b/runtime/onert/backend/acl_cl/TensorManager.h
@@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager =
acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
new file mode 100644
index 000000000..6ad5b7b69
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AclConstantInitializer.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
+{
+ // DO NOTHING
+}
+
+void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerCopyInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
+{
+ assert(node.getInputs().size() > index);
+
+ const auto &input_index = node.getInputs().at(index);
+ const auto &input_obj = _operands.at(input_index);
+ registerPermuteInitializer(input_index, input_obj);
+}
+
+void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
+{
+ const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
+ const auto &block_size_obj = _operands.at(block_size_index);
+
+ if (block_size_obj.isConstant())
+ {
+ _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
+ assert(model_obj.data());
+ const auto &shape = model_obj.shape();
+ const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
+ assert(model_obj.shape().rank() == 1);
+ obj.access([&](ITensor &tensor) {
+ for (size_t i = 0; i < shape.num_elements(); ++i)
+ {
+ const int32_t value = base[shape.num_elements() - i - 1];
+ int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
+ tensor.calcOffset({static_cast<int32_t>(i)}));
+ *into = value;
+ }
+ });
+ };
+ }
+}
+
+void AclConstantInitializer::visit(const ir::operation::Conv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::Conv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
+ copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+ copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
+ copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::LSTM &node)
+{
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
+ copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::RNN &node)
+{
+ copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
+ copyInputInitialize(node, ir::operation::RNN::BIAS);
+}
+
+void AclConstantInitializer::visit(const ir::operation::TransposeConv &node)
+{
+ permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
+}
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
new file mode 100644
index 000000000..52f4c54cf
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
+
+#include <backend/IConstantInitializer.h>
+#include <ir/Operands.h>
+#include "AclTensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+class AclConstantInitializer : public IConstantInitializer
+{
+public:
+ AclConstantInitializer(const ir::Operands &operands,
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
+
+public:
+ void visit(const ir::operation::BatchToSpaceND &) override;
+ void visit(const ir::operation::Conv2D &) override;
+ void visit(const ir::operation::DepthwiseConv2D &) override;
+ void visit(const ir::operation::FullyConnected &) override;
+ void visit(const ir::operation::LSTM &) override;
+ void visit(const ir::operation::RNN &) override;
+ void visit(const ir::operation::TransposeConv &) override;
+
+protected:
+ void copyInputInitialize(const ir::Operation &node, uint32_t index);
+ void permuteInputInitialize(const ir::Operation &node, uint32_t index);
+
+private:
+ std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; }
+
+protected:
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__
diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h
index 85b18e847..94b65863a 100644
--- a/runtime/onert/backend/acl_common/AclFunction.h
+++ b/runtime/onert/backend/acl_common/AclFunction.h
@@ -47,12 +47,6 @@ private:
std::unique_ptr<::arm_compute::IFunction> _func;
};
-class AclClFunction : public AclFunction
-{
-public:
- using AclFunction::AclFunction;
-};
-
} // namespace acl_common
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h
index 9f7ce3764..372ce689e 100644
--- a/runtime/onert/backend/acl_common/AclKernelGen.h
+++ b/runtime/onert/backend/acl_common/AclKernelGen.h
@@ -30,11 +30,32 @@ namespace backend
namespace acl_common
{
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args)
+{
+ auto l = std::make_unique<Layer>();
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
+template <typename Layer, typename... Args>
+std::unique_ptr<arm_compute::IFunction>
+generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args)
+{
+ auto l = std::make_unique<Layer>(memory_manager);
+
+ l->configure(std::forward<Args>(args)...);
+
+ return l;
+}
+
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
-std::unique_ptr<exec::IFunction>
-kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder)
+ typename T_TensorRegistry>
+std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node,
+ const ir::Operands &operands,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg)
{
// TODO Support dynamic rnn
// TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
@@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
const auto projection_clip = projection_threshold;
assert(cell_clip >= 0.f && projection_clip >= 0.f);
- auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get();
- auto output_tensor = tensor_builder->at(output_index).get();
+ auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get();
+ auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get();
+ auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
- auto input_tensor = tensor_builder->at(input_index).get();
+ auto input_tensor = tensor_reg->getAclTensor(input_index).get();
- auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get();
+ auto input_to_forget_weights_tensor =
+ tensor_reg->getAclTensor(input_to_forget_weights_index).get();
+ auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get();
+ auto input_to_output_weights_tensor =
+ tensor_reg->getAclTensor(input_to_output_weights_index).get();
auto recurrent_to_forget_weights_tensor =
- tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get();
+ auto recurrent_to_cell_weights_tensor =
+ tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get();
auto recurrent_to_output_weights_tensor =
- tensor_builder->at(recurrent_to_output_weights_index).get();
+ tensor_reg->getAclTensor(recurrent_to_output_weights_index).get();
- auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get();
+ auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get();
+ auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get();
+ auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get();
+ auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get();
+ auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get();
- auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = std::make_unique<T_ACLLayer>();
+ auto act_info = asActivationLayerInfo(activation);
::arm_compute::LSTMParams<T_Tensor> lstm_params{};
if (has_cifg_param)
{
auto input_to_input_weights_tensor =
- tensor_builder->at(input_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional
auto recurrent_to_input_weights_tensor =
- tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
+ tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional
auto cell_to_input_weights_handle =
- has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle()
+ has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle()
: nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional
+ auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional
lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(),
recurrent_to_input_weights_tensor->handle(),
cell_to_input_weights_handle, input_gate_bias_tensor->handle());
@@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands,
if (has_peephole_param)
{
auto cell_to_forget_weights_tensor =
- tensor_builder->at(cell_to_forget_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional
auto cell_to_output_weights_tensor =
- tensor_builder->at(cell_to_output_weights_index).get(); // optional
+ tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional
lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(),
cell_to_output_weights_tensor->handle());
}
if (has_projection_param)
{
- auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
+ auto projection_weights_tensor =
+ tensor_reg->getAclTensor(projection_weights_index).get(); // optional
+ auto projection_bias_handle =
+ has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle()
+ : nullptr; // optional
lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle);
}
- fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(),
- input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
- recurrent_to_forget_weights_tensor->handle(),
- recurrent_to_cell_weights_tensor->handle(),
- recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
- cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
- output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
- scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
- cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info,
- cell_clip, projection_clip);
+ auto fn = generateLayer<T_ACLLayer>(
+ input_tensor->handle(), input_to_forget_weights_tensor->handle(),
+ input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(),
+ recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(),
+ recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(),
+ cell_bias_tensor->handle(), output_gate_bias_tensor->handle(),
+ output_state_in_tensor->handle(), cell_state_in_tensor->handle(),
+ scratch_buffer_tensor->handle(), output_state_out_tensor->handle(),
+ cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip,
+ projection_clip);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer,
- typename T_TensorBuilder>
+ typename T_TensorBuilder, typename T_TensorRegistry>
std::unique_ptr<exec::IFunction>
kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout)
+ const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+ const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout)
{
using ir::operation::FullyConnected;
@@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
reshape.dim(1) = input_size; /* W */
}
- auto output_tensor = tensor_builder->at(output_index).get();
- const auto input_tensor = tensor_builder->at(input_index).get();
- const auto weight_tensor = tensor_builder->at(weight_index).get();
- const auto bias_tensor = tensor_builder->at(bias_index).get();
+ auto output_tensor = tensor_reg->getAclTensor(output_index).get();
+ const auto input_tensor = tensor_reg->getAclTensor(input_index).get();
+ const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get();
+ const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get();
const auto frontend_layout = layout;
const auto acl_layout = output_tensor->handle()->info()->data_layout();
- auto fn =
- std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL;
if (operands.at(weight_index).isConstant())
{
@@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope
assert(operands.at(weight_index).data());
}
- fn->configure(
- input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(),
- output_tensor->handle(), needs_reshape,
- ::onert::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
+ auto fn = generateLayer<T_ACLLayer>(
+ tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape,
+ asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type);
return std::make_unique<T_FunctionWrapper>(std::move(fn));
}
-template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder>
+template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry>
std::unique_ptr<::arm_compute::IFunction>
kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
- const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout,
+ const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout,
::arm_compute::PoolingType pooling_type)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands,
VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl;
VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl;
- auto ofm_tensor = tensor_builder->at(ofm_index).get();
- auto ifm_tensor = tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get();
::arm_compute::PoolingLayerInfo info{
pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(),
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = std::make_unique<T_ACLLayer>();
+ asPadStrideInfo(padding, stride), true /* exclude_padding */};
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info);
+ auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info);
return fn;
}
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 6b03fdf7f..91452014b 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -25,6 +25,7 @@
#include "ir/OperandIndexMap.h"
#include <ir/Operands.h>
#include "AclTensorManager.h"
+#include "AclTensorRegistry.h"
#include <memory>
#include "ParentInfo.h"
#include <util/Utils.h>
@@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder
public:
using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
- AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
+ AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg);
/**
* @brief Register tensor information to allocate on ACL-CL backend
@@ -63,19 +65,13 @@ public:
void notifyLastUse(const ir::OperandIndex &) override;
bool isRegistered(const ir::OperandIndex &) const override;
- std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; }
void prepare(void) override;
void allocate() override;
void postFunctionPrepare() override;
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
- std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-
T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); }
void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
@@ -100,8 +96,6 @@ public:
*/
bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
- bool supportDynamicTensor() override { return false; }
-
private:
void buildTensors(void);
ir::OperandIndex findRootParent(ir::OperandIndex index);
@@ -113,6 +107,7 @@ private:
ir::OperandIndexMap<size_t> _uses_count_map;
std::unique_ptr<T_AclTensorManager> _tensor_mgr;
+ std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg;
// for linear executor
std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
@@ -140,9 +135,10 @@ namespace acl_common
{
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands,
- T_AclTensorManager *tensor_mgr)
- : _operands{operands}, _tensor_mgr{tensor_mgr}
+AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(
+ const ir::Operands &operands, T_AclTensorManager *tensor_mgr,
+ const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg)
+ : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg}
{
assert(_tensor_mgr);
}
@@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi
}
template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_mgr->at(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn)
-{
- _tensor_mgr->iterate(fn);
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
-std::shared_ptr<T_ITensor>
-AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind)
-{
- auto ret = _tensor_mgr->at(ind);
- assert(ret != nullptr);
- return ret;
-}
-
-template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
std::unique_ptr<ITensorManager>
AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void)
{
diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h
new file mode 100644
index 000000000..1ef9f4b35
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+/**
+ * @brief Tensor registry class for acl backends
+ *
+ * This is implemented as a wrapper of AclTensorManager.
+ */
+template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry
+{
+public:
+ AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+ std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override
+ {
+ return _tensor_mgr->at(ind);
+ }
+
+ std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override
+ {
+ return getITensor(ind);
+ }
+
+ auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); }
+
+private:
+ T_AclTensorManager *_tensor_mgr;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc
index a5bbe1691..67dcc8192 100644
--- a/runtime/onert/backend/acl_common/Convert.cc
+++ b/runtime/onert/backend/acl_common/Convert.cc
@@ -18,6 +18,7 @@
#include "Swizzle.h"
#include "ir/DataType.h"
+#include "ir/operation/ElementwiseActivation.h"
#include <memory>
namespace
@@ -177,6 +178,50 @@ namespace acl_common
}
}
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta)
+{
+ switch (op_type)
+ {
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ if (beta == 0.f)
+ {
+ if (alpha == ir::operation::ElementwiseActivation::infinity)
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha};
+ }
+ }
+ else
+ {
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta};
+ }
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta};
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0.
+ // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always
+ // 0(always sigmoid) regardless of values of the parameter.
+ // If ACL support non-sigmoid logistic, should fix param values.
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
+ case ir::operation::ElementwiseActivation::Type::LEAKY_RELU:
+ return ::arm_compute::ActivationLayerInfo{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha};
+ default:
+ throw std::runtime_error{"Not supported, yet"};
+ break;
+ }
+}
+
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout)
{
@@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct
return std::make_unique<AclFunction>(std::move(layer));
}
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer)
-{
- return std::make_unique<AclClFunction>(std::move(layer));
-}
-
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout)
{
switch (data_layout)
@@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type)
}
}
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir)
+{
+ switch (pool_type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return arm_compute::PoolingType::AVG;
+ case ir::operation::Pool2D::PoolType::L2:
+ return arm_compute::PoolingType::L2;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return arm_compute::PoolingType::MAX;
+ default:
+ throw std::runtime_error("convertPoolType: Not supported operation yet");
+ }
+}
+
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h
index 9362098a5..380321c07 100644
--- a/runtime/onert/backend/acl_common/Convert.h
+++ b/runtime/onert/backend/acl_common/Convert.h
@@ -25,7 +25,9 @@
#include "ir/Layout.h"
#include "ir/InternalType.h"
#include "ir/Operand.h"
+#include "ir/operation/Pool2D.h"
#include "ir/operation/Reduce.h"
+#include "ir/operation/ElementwiseActivation.h"
#include "ir/Shape.h"
#include "ir/TypeInfo.h"
#include "ir/Coordinates.h"
@@ -59,6 +61,9 @@ namespace acl_common
const ir::Stride &stride);
::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code);
+::arm_compute::ActivationLayerInfo
+asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha,
+ float beta);
arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank,
ir::Layout frontend_layout, ir::Layout backend_layout);
@@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr
ir::Layout backend_layout);
std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
-std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer);
template <typename T_Function>
std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn)
@@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction>
ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout);
ir::DataType asRuntimeDataType(::arm_compute::DataType data_type);
+arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir);
arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir);
} // namespace acl_common
diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h
index a0b145e19..35d6e4e8e 100644
--- a/runtime/onert/backend/acl_neon/Backend.h
+++ b/runtime/onert/backend/acl_neon/Backend.h
@@ -48,10 +48,13 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor));
+ auto tm = createTensorManager(is_linear_executor);
+ auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm);
+ auto tb = std::make_shared<TensorBuilder>(operands, tm, tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb);
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr);
context->tensor_register = nullptr;
context->optimizer = std::make_shared<Optimizer>(context.get());
return context;
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 4191b277f..79edb9ded 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -24,100 +24,12 @@ namespace acl_neon
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : acl_common::AclConstantInitializer{operands, tensor_reg}
{
// DO NOTHING
}
-void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerCopyInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index)
-{
- assert(node.getInputs().size() > index);
-
- const auto &input_index = node.getInputs().at(index);
- const auto &input_obj = _operands.at(input_index);
- registerPermuteInitializer(input_index, input_obj);
-}
-
-void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE);
- const auto &block_size_obj = _operands.at(block_size_index);
-
- if (block_size_obj.isConstant())
- {
- _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
- assert(model_obj.data());
- const auto &shape = model_obj.shape();
- const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
- assert(model_obj.shape().rank() == 1);
- obj.access([&](ITensor &tensor) {
- for (size_t i = 0; i < shape.num_elements(); ++i)
- {
- const int32_t value = base[shape.num_elements() - i - 1];
- int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
- tensor.calcOffset({static_cast<int32_t>(i)}));
- *into = value;
- }
- });
- };
- }
-}
-
-void ConstantInitializer::visit(const ir::operation::Conv2D &node)
-{
- permuteInputInitialize(node, ir::operation::Conv2D::KERNEL);
- copyInputInitialize(node, ir::operation::Conv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
-{
- permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL);
- copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
-{
- copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT);
- copyInputInitialize(node, ir::operation::FullyConnected::BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::LSTM &node)
-{
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS);
- copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS);
-}
-
-void ConstantInitializer::visit(const ir::operation::RNN &node)
-{
- copyInputInitialize(node, ir::operation::RNN::WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS);
- copyInputInitialize(node, ir::operation::RNN::BIAS);
-}
-
void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
{
const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE);
@@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
}
}
-void ConstantInitializer::visit(const ir::operation::TransposeConv &node)
-{
- permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL);
-}
-
} // namespace acl_neon
} // namespace backend
} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h
index 6b4c1f145..c7d71cdcf 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.h
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h
@@ -17,9 +17,7 @@
#ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__
-#include <backend/IConstantInitializer.h>
-#include <ir/Operands.h>
-#include "TensorBuilder.h"
+#include "AclConstantInitializer.h"
namespace onert
{
@@ -28,29 +26,15 @@ namespace backend
namespace acl_neon
{
-class ConstantInitializer : public IConstantInitializer
+class ConstantInitializer : public acl_common::AclConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
- void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Conv2D &) override;
- void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::FullyConnected &) override;
- void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::TransposeConv &) override;
-
-private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
- void copyInputInitialize(const ir::Operation &node, uint32_t index);
- void permuteInputInitialize(const ir::Operation &node, uint32_t index);
-
-private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ using acl_common::AclConstantInitializer::visit;
+ void visit(const ir::operation::SpaceToBatchND &node) final;
};
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc
index 1195b83cc..6d53c1245 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc
@@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction;
using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
-KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
- const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg)
: _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
- _current_op_seq_layout(ir::Layout::UNKNOWN)
+ _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN)
{
// DO NOTHING
}
@@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
}
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
auto frontend_layout = _current_op_seq_layout;
auto backend_layout = ifm_tensor->layout();
@@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
-
- fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
- arm_compute::ReductionOperation::ARG_IDX_MAX);
+ auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>(
+ ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
@@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto block_size_index{
node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
assert(_ctx.at(block_size_index).data());
- auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Cast &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (ifm_tensor->data_type() == ofm_tensor->data_type())
- {
- auto l = std::make_unique<::arm_compute::NECopy>();
+ const auto activation = node.param().activation;
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn = std::move(l);
- }
- else
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().arithmetic_type)
{
- auto l = std::make_unique<::arm_compute::NECast>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
-
- fn = std::move(l);
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
+ arm_compute::ConvertPolicy::SATURATE);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ {
+ // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
+ fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
+ arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
+ break;
+ }
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ break;
+ }
+ default:
+ assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations");
+ break;
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
}
void KernelGenerator::visit(const ir::operation::Conv2D &node)
@@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
ker_width, ker_height);
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
- ::arm_compute::Size2D(1U, 1U), act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(),
+ ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info,
+ ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
auto block_size = node.param().block_size;
assert(block_size > 0);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>(
+ input_tensor->handle(), output_tensor->handle(), block_size);
- fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
@@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
{
- auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
- ofm_tensor->handle(), conv_info, multiplier, act_info);
+ auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(),
+ conv_info, multiplier, act_info);
_return_fn = asAclFunction(std::move(fn));
}
}
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
return;
}
- auto output_tensor = _tensor_builder->at(ofm_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get();
std::vector<::arm_compute::ITensor *> input_tensors;
for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
+ input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle());
std::unique_ptr<::arm_compute::IFunction> fn;
if (input_indexes.size() < 2)
{
- auto l = std::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_tensor->handle());
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0),
+ output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = output_tensor->layout();
const auto fixed_axis =
acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_tensor->handle(), fixed_axis);
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>(
+ input_tensors, output_tensor->handle(), fixed_axis);
}
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
+
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+
+ const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo(
+ node.param().op_type, node.param().alpha, node.param().beta);
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ std::unique_ptr<arm_compute::IFunction> fn;
+ if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC)
+ {
+ // NOTE NEActivationLayer can generate produce erroneous results. it were caused by
+ // 'vexpq_f32()'.
+ // The neon function returns a value outside of the limit of representation in float as 'NaN'
+ // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), act_info);
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(),
+ ofm_tensor->handle(), act_info);
+ }
- auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- _return_fn = std::move(acl_fn);
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalAnd>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NELogicalOr>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>(
+ lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle());
+ break;
+ }
+ default:
+ {
+ std::string err_msg("acl_neon KernelGenerator : " + node.name() +
+ "is not elementwise-binary operations");
+ assert(false && err_msg.c_str());
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Floor &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
+
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+
+ std::unique_ptr<arm_compute::IFunction> fn;
+ switch (node.param().op_type)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ {
+ if (input_tensor->data_type() == output_tensor->data_type())
+ {
+ fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(),
+ output_tensor->handle());
+ }
+ else
+ {
+ fn = acl_common::generateLayer<arm_compute::NECast>(
+ input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
+ }
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::DEQUANTIZE:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::FLOOR:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ {
+ fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ {
+ fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ break;
+ }
+ case ir::operation::ElementwiseUnary::Type::SQRT:
+ {
+ const ::arm_compute::ActivationLayerInfo act_info{
+ ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
- auto fn = std::make_unique<::arm_compute::NEFloor>();
+ fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(
+ input_tensor->handle(), output_tensor->handle(), act_info);
+ break;
+ }
+ default:
+ {
+ throw std::runtime_error("acl_neon KernelGenerator : " + node.name() +
+ "is not supported yet");
+ break;
+ }
+ }
+ _return_fn = asAclFunction(std::move(fn));
+}
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
+void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
+ const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
- auto acl_fn = asAclFunction(std::move(fn));
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- _return_fn = std::move(acl_fn);
+ auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>(
+ values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
+
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
const auto output_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->at(output_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
const auto activation = node.param().activation;
auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
::arm_compute::NEFullyConnectedReshapingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout);
+ node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout);
_return_fn = std::make_unique<exec::FunctionSequence>(
std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
}
@@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hits_tensor = _tensor_builder->at(hits_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get();
- auto lookups_tensor = _tensor_builder->at(lookups_index).get();
- auto keys_tensor = _tensor_builder->at(keys_index).get();
- auto values_tensor = _tensor_builder->at(values_index).get();
+ auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get();
+ auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get();
+ auto values_tensor = _tensor_reg->getAclTensor(values_index).get();
- auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
+ auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>(
+ lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
+ output_tensor->handle(), hits_tensor->handle());
- fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
- output_tensor->handle(), hits_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Gather &node)
@@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
// Converting in reverse order
const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto indices_tensor = _tensor_builder->at(indices_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get();
const auto backend_layout = ofm_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
assert(backend_layout == indices_tensor->layout());
assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
- auto fn = std::make_unique<::arm_compute::NEGatherEx>();
-
// input is n-D, indices k-D, output is (n + k - 1)-D
size_t n = ifm_rank;
assert(n == ifm_tensor->num_dimensions());
@@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
}
- fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>(
+ ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
// It would create an error when the kernel accesses high dimension that its value is 1
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
@@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto gamma_tensor = _tensor_builder->at(gamma_index).get();
- auto beta_tensor = _tensor_builder->at(beta_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get();
+ auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get();
auto epsilon = node.param().epsilon;
auto activation = node.param().activation;
- auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
- beta_tensor->handle(), epsilon);
+ auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>(
+ ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(),
+ epsilon);
_return_fn = std::make_unique<exec::FunctionSequence>(
asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
@@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
float bias = 0.0f; // Don't offset the reduction.
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
radius, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
- node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
-
- const auto ofm_index{node.getOutputs().at(0)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- const auto activation = node.param().activation;
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(raw_fn)),
- ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
@@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod
auto beta = node.param().beta;
auto bias = node.param().bias;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
const auto norm_info = ::arm_compute::NormalizationLayerInfo(
::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
- auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
+ auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
- // The neon function returns a value outside of the limit of representation in float as 'NaN'
- // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
- auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::LSTM &node)
{
_return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
- ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg);
}
void KernelGenerator::visit(const ir::operation::Pack &node)
@@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
for (const auto &input_index : node.getInputs())
input_indexes.emplace_back(input_index);
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
std::vector<arm_compute::ITensor *> inputs;
for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
+ inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout();
if (axis < 0)
axis += output_rank;
axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEStackLayer>();
-
// Disable applied dim_correction
for (const auto &input_index : input_indexes)
{
size_t input_rank = _ctx.at(input_index).shape().rank();
- const auto &input_tensor = _tensor_builder->at(input_index);
+ const auto &input_tensor = _tensor_reg->getAclTensor(input_index);
assert(input_rank == input_tensor->num_dimensions());
if (input_rank != input_tensor->info()->num_dimensions())
{
@@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
}
}
- fn->configure(inputs, axis, output);
+ auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output);
// acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
// use arm_compute::TensorInfo::offset_element_in_bytes()
@@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
auto rank = _ctx.at(input_index).shape().rank();
auto pad_base = _ctx.at(pad_index).data()->base();
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
+ auto output = _tensor_reg->getAclTensor(output_index).get()->handle();
::arm_compute::PaddingList padding_list;
padding_list.resize(rank);
@@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
const auto axis =
acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
@@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto pixel_value =
::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
- auto fn = std::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value);
_return_fn = asAclFunction(std::move(fn));
}
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
+{
+ auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
+ node, _ctx, _tensor_reg, _current_op_seq_layout,
+ acl_common::convertPoolType(node.param().op_type));
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ const auto activation = node.param().activation;
+ _return_fn = std::make_unique<exec::FunctionSequence>(
+ asAclFunction(std::move(raw_fn)),
+ ActivationBuilder::generate(activation, ofm_tensor->handle()));
+}
+
void KernelGenerator::visit(const ir::operation::Permute &node)
{
const auto ofm_idx{node.getOutputs().at(0)};
const auto ifm_idx{node.getInputs().at(0)};
const auto permute_type = node.getPermuteType();
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto rank = _ctx.at(ofm_idx).shape().rank();
assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
@@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node)
// WHCN -> CWHN
pv = arm_compute::PermutationVector{2, 0, 1};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
{
// CWHN -> WHCN
pv = arm_compute::PermutationVector{1, 2, 0};
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), pv);
}
else
{
- auto l = std::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle());
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::PReLU &node)
@@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node)
const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto alpha_tensor = _tensor_builder->at(alpha_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get();
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = std::make_unique<::arm_compute::NEPReluLayer>();
-
- l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>(
+ ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reduce &node)
@@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// Convert to ACL axes taking into account negative values and possible duplicates.
const auto &axes = _ctx.at(axes_index);
@@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
std::unique_ptr<::arm_compute::IFunction> fn;
if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
- auto l = std::make_unique<::arm_compute::NEReduceMean>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
{
- auto l = std::make_unique<::arm_compute::NEReduceSum>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes,
+ keep_dims, output_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEReduceOperation>();
-
- l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
- acl_common::convertReduceType(reduce_type));
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEReduceOperation>(
+ input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
+ acl_common::convertReduceType(reduce_type));
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Reshape &node)
@@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
// NOTE This operation must not be changed the layout from frontend to backend
// So, PermutationOperationPass makes layouts of frontend and backend the same.
@@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
UNUSED_RELEASE(frontend_layout);
UNUSED_RELEASE(backend_layout);
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
@@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEScale>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
+ auto fn = acl_common::generateLayer<arm_compute::NEScale>(
+ ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR,
+ ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f),
+ ::arm_compute::SamplingPolicy::TOP_LEFT);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::RNN &node)
@@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node)
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto weights_tensor = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_tensor = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get();
+ auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get();
+ auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get();
+ auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get();
auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
- auto copy_layer = std::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
+ auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>(
+ hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
_return_fn = asAclFunction(std::move(copy_layer));
- auto fn = std::make_unique<::arm_compute::NERNNLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_tensor->handle(), weights_tensor->handle(),
- recurrent_weights_tensor->handle(), bias_tensor->handle(),
- hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
- _return_fn = asAclFunction(std::move(fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
+ auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(),
+ hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
_return_fn = asAclFunction(std::move(fn));
}
@@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
(void)dims;
(void)ndim;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
- auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_tensor->handle(), output_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Softmax &node)
@@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = input_tensor->layout();
@@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
}
- auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
+ auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(),
+ output_tensor->handle(), beta);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
@@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto block_size_tensor = _tensor_builder->at(block_size_index).get();
- auto paddings_tensor = _tensor_builder->at(paddings_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get();
+ auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get();
assert(_ctx.at(block_size_index).data());
assert(_ctx.at(paddings_index).data());
- auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
-
- fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
- ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>(
+ ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
+ ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
@@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
auto block_size = node.param().block_size;
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
-
- auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
- fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
+ auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>(
+ ifm_tensor->handle(), ofm_tensor->handle(), block_size);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Split &node)
@@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node)
for (const auto &output : node.getOutputs())
output_indexes.emplace_back(output);
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
std::vector<arm_compute::ITensor *> output_tensors;
for (const auto &ofm_ind : output_indexes)
- output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
+ output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle());
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node)
axis += ifm_rank;
axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_tensor->handle(), output_tensors, axis);
+ auto fn =
+ acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>(
+ lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Slice &node)
@@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
ends_set.set(i, ends[i]);
}
- auto fn = std::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
+ auto fn = acl_common::generateLayer<arm_compute::NESlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::StridedSlice &node)
@@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto outputData_tensor = _tensor_builder->at(output_index).get();
- auto inputData_tensor = _tensor_builder->at(input_index).get();
+ auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = inputData_tensor->layout();
@@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
strides_set.set(i, strides[i]);
}
- auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
+ auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>(
+ inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set,
+ begin_mask, end_mask, shrink_axis_mask);
- fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::TransposeConv &node)
@@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node)
invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
}
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto ifm_tensor = _tensor_builder->at(ifm_index).get();
- auto ker_tensor = _tensor_builder->at(ker_index).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get();
const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
- auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>(
+ ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info,
+ invalid_horizontal, invalid_vertical);
- fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
- tconv_info, invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Transpose &node)
@@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
const auto &perm{node.param().perm};
- auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
- const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
+ auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get();
+ const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get();
const auto frontend_layout = _current_op_seq_layout;
const auto backend_layout = ifm_tensor->layout();
@@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node)
rank, pv, frontend_layout, backend_layout);
std::unique_ptr<::arm_compute::IFunction> fn;
-
if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
{
- auto l = std::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle());
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(),
+ ofm_tensor->handle());
}
else
{
- auto l = std::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
-
- fn = std::move(l);
+ fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(),
+ ofm_tensor->handle(), backend_pv);
}
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Unpack &node)
@@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
for (const auto &output_index : node.getOutputs())
output_indexes.emplace_back(output_index);
- auto input = _tensor_builder->at(input_index).get()->handle();
+ auto input = _tensor_reg->getAclTensor(input_index).get()->handle();
std::vector<arm_compute::ITensor *> outputs;
for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
+ outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle());
const auto frontend_layout = _current_op_seq_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
+ const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout();
if (axis < 0)
axis += input_rank;
axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
- auto fn = std::make_unique<::arm_compute::NEUnstack>();
-
// Disable applied dim_correction
std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
for (const auto &output_index : output_indexes)
{
size_t output_rank = _ctx.at(output_index).shape().rank();
- const auto &output_tensor = _tensor_builder->at(output_index);
+ const auto &output_tensor = _tensor_reg->getAclTensor(output_index);
orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
assert(output_rank == output_tensor->num_dimensions());
if (output_rank != output_tensor->info()->num_dimensions())
@@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
}
}
- fn->configure(input, outputs, axis);
+ auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis);
_return_fn = asAclFunction(std::move(fn));
}
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- _return_fn = std::make_unique<exec::FunctionSequence>(
- asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input_tensor = _tensor_builder->at(input_index).get();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getAclTensor(input_index).get();
- auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
+ auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(),
+ output_tensor->handle());
- fn->configure(input_tensor->handle(), output_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::Comparison &node)
@@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto comparison_type = node.param().comparison_type;
- auto output_tensor = _tensor_builder->at(output_index).get();
- auto input0_tensor = _tensor_builder->at(input0_index).get();
- auto input1_tensor = _tensor_builder->at(input1_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
+ auto output_tensor = _tensor_reg->getAclTensor(output_index).get();
+ auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get();
+ auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get();
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
+ auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>(
+ input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
+ (arm_compute::ComparisonOperation)comparison_type);
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->at(ofm_index).get();
- auto lhs_tensor = _tensor_builder->at(lhs_index).get();
- auto rhs_tensor = _tensor_builder->at(rhs_index).get();
-
- auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _return_fn = std::move(acl_fn);
+ _return_fn = asAclFunction(std::move(fn));
}
void KernelGenerator::visit(const ir::operation::OneHot &node)
@@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->at(out_idx).get();
- auto indices_tensor = _tensor_builder->at(indices_idx).get();
- auto depth_tensor = _tensor_builder->at(depth_idx).get();
- auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
- auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
-
- auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
- fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
- offvalue_tensor->handle(), output_tensor->handle(), axis);
- auto acl_fn = asAclFunction(std::move(fn));
- _return_fn = std::move(acl_fn);
+ auto output_tensor = _tensor_reg->getAclTensor(out_idx).get();
+ auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get();
+ auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get();
+ auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get();
+ auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get();
+
+ auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>(
+ indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
+ offvalue_tensor->handle(), output_tensor->handle(), axis);
+ _return_fn = asAclFunction(std::move(fn));
}
} // namespace acl_neon
diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h
index d6f7932b7..4d269cde5 100644
--- a/runtime/onert/backend/acl_neon/KernelGenerator.h
+++ b/runtime/onert/backend/acl_neon/KernelGenerator.h
@@ -21,6 +21,8 @@
#include "ir/Operands.h"
#include "TensorBuilder.h"
+#include "AclTensorRegistry.h"
+#include "TensorManager.h"
namespace onert
{
@@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator
{
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg);
void visit(const ir::OpSequence &) override;
- void visit(const ir::operation::Abs &) override;
void visit(const ir::operation::ArgMax &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
- void visit(const ir::operation::Cast &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthToSpace &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::Dequantize &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::EmbeddingLookup &) override;
- void visit(const ir::operation::Floor &) override;
void visit(const ir::operation::FullyConnected &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::HashtableLookup &) override;
void visit(const ir::operation::InstanceNorm &) override;
void visit(const ir::operation::L2Normalization &) override;
- void visit(const ir::operation::L2Pool2D &) override;
void visit(const ir::operation::LocalResponseNormalization &) override;
- void visit(const ir::operation::LogicalAnd &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::LogicalOr &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::LSTM &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Pad &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Permute &) override;
void visit(const ir::operation::PReLU &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU1 &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Reshape &) override;
void visit(const ir::operation::ResizeBilinear &) override;
void visit(const ir::operation::RNN &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Squeeze &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::SQRT &) override;
void visit(const ir::operation::SquaredDifference &) override;
- void visit(const ir::operation::Sub &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::TransposeConv &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Unpack &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Div &) override;
- void visit(const ir::operation::Exp &) override;
void visit(const ir::operation::ExpandDims &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Max &) override;
void visit(const ir::operation::OneHot &) override;
private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg;
ir::Layout _current_op_seq_layout;
};
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 2948cab09..ac80901cc 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -19,7 +19,7 @@
#include "ParentInfo.h"
#include <cassert>
-#include <ir/LoweredGraph.h>
+#include <compiler/LoweredGraph.h>
#include <util/logging.h>
#include "AclSubTensorAnalyzer.h"
diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h
index 3ec9efa8f..3b7cfbcfd 100644
--- a/runtime/onert/backend/acl_neon/TensorManager.h
+++ b/runtime/onert/backend/acl_neon/TensorManager.h
@@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager<
using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor,
operand::NESubTensor>;
-TensorManager *createTensorManager(bool is_linear_executor)
+inline TensorManager *createTensorManager(bool is_linear_executor)
{
if (is_linear_executor)
{
diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h
index 56bd352e0..fc8574b26 100644
--- a/runtime/onert/backend/cpu/Backend.h
+++ b/runtime/onert/backend/cpu/Backend.h
@@ -47,10 +47,12 @@ public:
const auto &operands = graph.operands();
const auto &operations = graph.operations();
auto context = std::make_unique<BackendContext>(this, &graph);
- auto tb = std::make_shared<TensorBuilder>();
+ auto tr = std::make_shared<cpu_common::TensorRegistry>();
+ auto tb = std::make_shared<TensorBuilder>(tr);
+ context->tensor_registry = tr;
context->tensor_builder = tb;
- context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb);
- context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb,
+ context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr);
+ context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb,
context->external_context());
context->tensor_register = nullptr;
context->optimizer = nullptr;
diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h
index f314a8e39..e90b21054 100644
--- a/runtime/onert/backend/cpu/BackendContext.h
+++ b/runtime/onert/backend/cpu/BackendContext.h
@@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext
{
public:
BackendContext(const Backend *backend, const ir::Graph *graph,
+ std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
std::shared_ptr<ITensorBuilder> tensor_builder = nullptr,
std::shared_ptr<IConstantInitializer> constant_initializer = nullptr,
std::shared_ptr<IKernelGenerator> kernel_gen = nullptr,
std::shared_ptr<ITensorRegister> tensor_register = nullptr,
std::shared_ptr<IOptimizer> optimizer = nullptr)
- : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer,
- kernel_gen, tensor_register, optimizer),
+ : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder,
+ constant_initializer, kernel_gen, tensor_register,
+ optimizer),
_external_context(new ExternalContext)
{
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc
index deb27f0fe..6f6eb77bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.cc
+++ b/runtime/onert/backend/cpu/ConstantInitializer.cc
@@ -25,8 +25,8 @@ namespace cpu
{
ConstantInitializer::ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : IConstantInitializer{operands}, _tensor_builder{tensor_builder}
+ const std::shared_ptr<ITensorRegistry> &tensor_reg)
+ : IConstantInitializer{operands}, _tensor_reg{tensor_reg}
{
// DO NOTHING
}
diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h
index de03a693a..c016c83bc 100644
--- a/runtime/onert/backend/cpu/ConstantInitializer.h
+++ b/runtime/onert/backend/cpu/ConstantInitializer.h
@@ -17,7 +17,7 @@
#ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
#define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__
-#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include <backend/IConstantInitializer.h>
#include <ir/Operands.h>
@@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer
{
public:
ConstantInitializer(const ir::Operands &operands,
- const std::shared_ptr<TensorBuilder> &tensor_builder);
+ const std::shared_ptr<ITensorRegistry> &tensor_reg);
public:
void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override;
@@ -50,10 +50,10 @@ public:
void visit(const ir::operation::FullyConnected &) override;
private:
- std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; }
+ std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
private:
- std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<ITensorRegistry> _tensor_reg;
};
} // namespace cpu
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
index 7939fe894..74b6f0c6b 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -16,49 +16,36 @@
#include "KernelGenerator.h"
-#include "ops/AbsLayer.h"
-#include "ops/AddLayer.h"
#include "ops/ArgMinMaxLayer.h"
-#include "ops/AvgPoolLayer.h"
#include "ops/BatchToSpaceNDLayer.h"
-#include "ops/CastLayer.h"
+#include "ops/BinaryArithmeticLayer.h"
#include "ops/CompareLayer.h"
#include "ops/ConcatLayer.h"
#include "ops/ConvolutionLayer.h"
-#include "ops/CosLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
-#include "ops/DivLayer.h"
#include "ops/EinsumLayer.h"
-#include "ops/ExpLayer.h"
+#include "ops/ElementwiseActivationLayer.h"
+#include "ops/ElementwiseBinaryLayer.h"
+#include "ops/ElementwiseUnaryLayer.h"
#include "ops/ExpandDimsLayer.h"
#include "ops/FillLayer.h"
#include "ops/FullyConnectedLayer.h"
#include "ops/GatherLayer.h"
-#include "ops/LogLayer.h"
-#include "ops/LogisticLayer.h"
-#include "ops/MaxLayer.h"
-#include "ops/MaxPoolLayer.h"
#include "ops/MeanLayer.h"
-#include "ops/MinLayer.h"
-#include "ops/MulLayer.h"
-#include "ops/NegLayer.h"
#include "ops/OneHotLayer.h"
#include "ops/OperationUtils.h"
#include "ops/PackLayer.h"
#include "ops/PadLayer.h"
+#include "ops/PoolLayer.h"
#include "ops/PowLayer.h"
#include "ops/RangeLayer.h"
+#include "ops/RankLayer.h"
#include "ops/ReduceLayer.h"
-#include "ops/ReLULayer.h"
-#include "ops/ReLU6Layer.h"
#include "ops/ReshapeLayer.h"
#include "ops/ResizeBilinearLayer.h"
#include "ops/ReverseLayer.h"
-#include "ops/RoundLayer.h"
-#include "ops/RsqrtLayer.h"
#include "ops/SelectLayer.h"
#include "ops/ShapeLayer.h"
-#include "ops/SinLayer.h"
#include "ops/SliceLayer.h"
#include "ops/SoftMaxLayer.h"
#include "ops/StridedSliceLayer.h"
@@ -66,22 +53,16 @@
#include "ops/SpaceToDepthLayer.h"
#include "ops/SplitLayer.h"
#include "ops/SplitVLayer.h"
-#include "ops/SubLayer.h"
-#include "ops/TanhLayer.h"
#include "ops/TileLayer.h"
#include "ops/TransposeLayer.h"
#include "ops/UnpackLayer.h"
-#include "ops/LogicalNotLayer.h"
-#include "ops/ZerosLikeLayer.h"
#include "ops/SquaredDiffLayer.h"
-#include "ops/LogicalOrLayer.h"
#include "ops/L2NormLayer.h"
#include "ops/MatrixBandPartLayer.h"
#include "ops/BatchMatMulLayer.h"
#include "ops/BroadcastToLayer.h"
#include "ops/FusedBatchNormLayer.h"
#include "ops/LogSoftMaxLayer.h"
-#include "ops/QuantizeLayer.h"
#include "ops/StatelessRandomUniformLayer.h"
#include <backend/Backend.h>
@@ -102,6 +83,104 @@ namespace cpu
namespace
{
+ops::ArithmeticType
+convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir)
+{
+ switch (arithmetic_type_ir)
+ {
+ case ir::operation::BinaryArithmetic::ArithmeticType::ADD:
+ return ops::ArithmeticType::kAdd;
+ case ir::operation::BinaryArithmetic::ArithmeticType::SUB:
+ return ops::ArithmeticType::kSub;
+ case ir::operation::BinaryArithmetic::ArithmeticType::MUL:
+ return ops::ArithmeticType::kMul;
+ case ir::operation::BinaryArithmetic::ArithmeticType::DIV:
+ return ops::ArithmeticType::kDiv;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseActivationType
+convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseActivation::Type::LOGISTIC:
+ return ops::ElementwiseActivationType::kLogistic;
+ case ir::operation::ElementwiseActivation::Type::RELU:
+ return ops::ElementwiseActivationType::kReLU;
+ case ir::operation::ElementwiseActivation::Type::TANH:
+ return ops::ElementwiseActivationType::kTanh;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseBinaryType
+convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR:
+ return ops::ElementwiseBinaryType::kLogicalOr;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX:
+ return ops::ElementwiseBinaryType::kMax;
+ case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN:
+ return ops::ElementwiseBinaryType::kMin;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::ElementwiseUnary::Type::ABS:
+ return ops::ElementwiseUnaryType::kAbs;
+ case ir::operation::ElementwiseUnary::Type::CAST:
+ return ops::ElementwiseUnaryType::kCast;
+ case ir::operation::ElementwiseUnary::Type::COS:
+ return ops::ElementwiseUnaryType::kCos;
+ case ir::operation::ElementwiseUnary::Type::ERF:
+ return ops::ElementwiseUnaryType::kErf;
+ case ir::operation::ElementwiseUnary::Type::EXP:
+ return ops::ElementwiseUnaryType::kExp;
+ case ir::operation::ElementwiseUnary::Type::LOG:
+ return ops::ElementwiseUnaryType::kLog;
+ case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT:
+ return ops::ElementwiseUnaryType::kLogicalNot;
+ case ir::operation::ElementwiseUnary::Type::NEG:
+ return ops::ElementwiseUnaryType::kNeg;
+ case ir::operation::ElementwiseUnary::Type::QUANTIZE:
+ return ops::ElementwiseUnaryType::kQuantize;
+ case ir::operation::ElementwiseUnary::Type::ROUND:
+ return ops::ElementwiseUnaryType::kRound;
+ case ir::operation::ElementwiseUnary::Type::RSQRT:
+ return ops::ElementwiseUnaryType::kRSqrt;
+ case ir::operation::ElementwiseUnary::Type::SIN:
+ return ops::ElementwiseUnaryType::kSin;
+ case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE:
+ return ops::ElementwiseUnaryType::kZerosLike;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
+ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+{
+ switch (type_ir)
+ {
+ case ir::operation::Pool2D::PoolType::AVG:
+ return ops::PoolType::kAvg;
+ case ir::operation::Pool2D::PoolType::MAX:
+ return ops::PoolType::kMax;
+ default:
+ throw std::runtime_error("cpu KernelGenerator : Not supported operation yet");
+ }
+}
+
ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir)
{
switch (reduce_type_ir)
@@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_
KernelGenerator::KernelGenerator(
const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context)
: _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder),
- _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN),
- _external_context(external_context)
+ _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder),
+ _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context)
{
// DO NOTHING
}
@@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
{
assert(!_return_fn_seq);
assert(_tensor_builder->dynamicTensorManager());
- assert(_tensor_builder->tensorRegistry());
+ assert(_tensor_reg);
- auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager();
- auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(
- _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry());
+ auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg);
_return_fn_seq = std::make_unique<exec::FunctionSequence>();
@@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
dyn_ctx->op_seq = &op_seq;
dyn_ctx->operations = &_operations_ctx;
dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer);
- dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry();
+ dyn_ctx->tensor_registry = _tensor_reg;
dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager();
_return_fn_seq->dynamic_tensor_ctx(dyn_ctx);
@@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq)
for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs())
{
- auto portable_tensor = _tensor_builder->portableAt(ind);
+ auto portable_tensor = _tensor_reg->getPortableTensor(ind);
if (portable_tensor)
{
assert(portable_tensor->layout() == ir::Layout::NHWC);
}
- auto tensor = _tensor_builder->at(ind);
+ auto tensor = _tensor_reg->getNativeTensor(ind);
if (tensor)
{
tensor->increase_ref();
@@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
const auto stride = node.param().stride;
const auto activation = node.param().activation;
const auto param_padding = node.param().padding;
+ const auto dilation = node.param().dilation;
auto fn = std::make_unique<ops::ConvolutionLayer>();
if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic())
{
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left,
param_padding.param.right, param_padding.param.top, param_padding.param.bottom,
- stride.horizontal, stride.vertical, activation, ofm_tensor);
+ stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor,
+ activation, ofm_tensor);
_return_fn = std::move(fn);
return;
@@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
const auto ker_width = ker_shape.dim(2);
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ dilation.width_factor, dilation.height_factor);
fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left,
padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical,
- activation, ofm_tensor);
+ dilation.width_factor, dilation.height_factor, activation, ofm_tensor);
_return_fn = std::move(fn);
}
@@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
const auto multiplier = node.param().multiplier;
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
- auto ker_tensor = _tensor_builder->portableAt(ker_index).get();
- auto bias_tensor = _tensor_builder->portableAt(bias_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
+ auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get();
+ auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>();
@@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
-
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::MaxPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
- const auto padding =
- ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AvgPoolLayer>();
-
- fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
- stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Concat &node)
{
const auto ofm_index{node.getOutputs().at(0)};
@@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node)
const auto rank = _ctx.at(ofm_index).shape().rank();
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::ConcatLayer>();
@@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)};
const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
- auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get();
auto fn = std::make_unique<ops::BatchToSpaceNDLayer>();
@@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
if (node.getInputs().size() != NNApiInputs)
{
const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)};
- crops_alloc = _tensor_builder->portableAt(crops_data_index).get();
+ crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get();
}
fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc);
@@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node)
const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)};
const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto value_tensor = _tensor_builder->portableAt(value_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto value_tensor = _tensor_reg->getPortableTensor(value_index).get();
auto fn = std::make_unique<ops::FillLayer>();
@@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node)
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto weight_tensor = _tensor_builder->portableAt(weight_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get();
auto bias_tensor =
- bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get();
+ bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get();
auto fn = std::make_unique<ops::FullyConnectedLayer>();
@@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// optional 2nd input
IPortableTensor *shape_tensor = nullptr;
@@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node)
if (node.getInputs().size() == 2)
{
const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
- shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
}
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
// Squeeze can share same kernel with reshape
auto fn = std::make_unique<ops::ReshapeLayer>();
@@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
const auto beta = node.param().beta;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::SoftMaxLayer>();
@@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Add &node)
+void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)};
const auto activation = node.param().activation;
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::AddLayer>();
+ auto fn = std::make_unique<ops::BinaryArithmeticLayer>();
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation,
+ convertArithmeticType(node.param().arithmetic_type));
_return_fn = std::move(fn);
}
@@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node)
const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto comparison_type = node.param().comparison_type;
@@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
const auto backend_layout = output_tensor->layout();
UNUSED_RELEASE(backend_layout);
@@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::SubLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MulLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::OneHot &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto indices_tensor = _tensor_builder->portableAt(indices_index).get();
- auto depth_tensor = _tensor_builder->portableAt(depth_index).get();
- auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get();
- auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get();
+ auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get();
+ auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get();
+ auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get();
assert(indices_tensor->data_type() == OperandType::INT32);
assert(axis <= static_cast<int>(indices_tensor->num_dimensions()));
@@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- // The same as Add
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::DivLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Einsum &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto equation = node.param().equation;
@@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
const auto &operand = _ctx.at(idx);
// TODO make sure using `_current_op_seq_layout` is correct for custom operations
types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()});
- auto in_tensor = _tensor_builder->portableAt(idx);
+ auto in_tensor = _tensor_reg->getPortableTensor(idx);
tensors.emplace_back(in_tensor);
}
};
@@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Exp &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::ExpLayer>();
+ auto fn = std::make_unique<ops::ElementwiseActivationLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta,
+ convertElementwiseActivationType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ExpandDims &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
- const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
+ const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::ExpandDimsLayer>();
+ auto fn = std::make_unique<ops::ElementwiseBinaryLayer>();
- fn->configure(input_tensor, axis_tensor, output_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, output_tensor,
+ convertElementwiseBinaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Logistic &node)
+void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
- auto fn = std::make_unique<ops::LogisticLayer>();
+ auto fn = std::make_unique<ops::ElementwiseUnaryLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Tanh &node)
+void KernelGenerator::visit(const ir::operation::ExpandDims &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+ const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
+ const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
- auto fn = std::make_unique<ops::TanhLayer>();
+ auto fn = std::make_unique<ops::ExpandDimsLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(input_tensor, axis_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node)
assert(-rank <= axis && axis < rank);
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
auto fn = std::make_unique<ops::PackLayer>();
@@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node)
assert(rank == 0 || (-rank <= axis && axis < rank));
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
std::vector<IPortableTensor *> output_tensors;
for (auto &output_idx : node.getOutputs())
- output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::UnpackLayer>();
@@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
const auto output_index{node.getOutputs().at(0)};
assert(_ctx.at(pad_index).data());
- auto input = _tensor_builder->portableAt(input_index).get();
- auto output = _tensor_builder->portableAt(output_index).get();
+ auto input = _tensor_reg->getPortableTensor(input_index).get();
+ auto output = _tensor_reg->getPortableTensor(output_index).get();
auto pad_rank = _ctx.at(pad_index).shape().dim(0);
auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
@@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MaxLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::MinLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CastLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Transpose &node)
{
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::TransposeLayer>();
@@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
const auto keep_dims = node.param().keep_dims;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axes_tensor = _tensor_builder->portableAt(axes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get();
if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN)
{
@@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node)
}
}
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLULayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(0)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::ReLU6Layer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Select &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node)
const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)};
const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto condition_tensor = _tensor_builder->portableAt(condition_index).get();
- auto true_tensor = _tensor_builder->portableAt(true_index).get();
- auto false_tensor = _tensor_builder->portableAt(false_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get();
+ auto true_tensor = _tensor_reg->getPortableTensor(true_index).get();
+ auto false_tensor = _tensor_reg->getPortableTensor(false_index).get();
auto fn = std::make_unique<ops::SelectLayer>();
@@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node)
const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto begins_tensor = _tensor_builder->portableAt(begins_index).get();
- auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get();
+ auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get();
auto fn = std::make_unique<ops::SliceLayer>();
@@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node)
const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto starts_tensor = _tensor_builder->portableAt(starts_index).get();
- auto ends_tensor = _tensor_builder->portableAt(ends_index).get();
- auto strides_tensor = _tensor_builder->portableAt(strides_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get();
+ auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get();
+ auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get();
auto begin_mask = node.param().begin_mask;
auto end_mask = node.param().end_mask;
@@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node)
const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout);
auto axis_resolved = axis < 0 ? axis + rank : axis;
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitLayer>();
@@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::AbsLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Sin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::SinLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Cos &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::CosLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::RsqrtLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::Shape &node)
{
const auto ofm_index{node.getOutputs().at(0)};
const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
auto fn = std::make_unique<ops::ShapeLayer>();
@@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
auto align_corners = node.param().align_corners;
auto half_pixel_centers = node.param().half_pixel_centers;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ResizeBilinearLayer>();
@@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)};
const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto axis_tensor = _tensor_builder->portableAt(axis_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get();
auto fn = std::make_unique<ops::ReverseLayer>();
@@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::NegLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::ArgMax &node)
{
const auto output_index{node.getOutputs().at(0)};
@@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::ArgMinMaxLayer>();
@@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Pow &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
-
- auto fn = std::make_unique<ops::PowLayer>();
-
- fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::Log &node)
+void KernelGenerator::visit(const ir::operation::Pool2D &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)};
-
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get();
-
- auto fn = std::make_unique<ops::LogLayer>();
-
- fn->configure(ifm_tensor, ofm_tensor);
-
- _return_fn = std::move(fn);
-}
+ const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)};
-void KernelGenerator::visit(const ir::operation::Round &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)};
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RoundLayer>();
+ auto fn = std::make_unique<ops::PoolLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor,
+ convertPoolType(node.param().op_type));
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
+void KernelGenerator::visit(const ir::operation::Pow &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)};
-
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
-
- auto fn = std::make_unique<ops::LogicalNotLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(0)};
- const auto rhs_index{node.getInputs().at(1)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
- auto fn = std::make_unique<ops::LogicalOrLayer>();
+ auto fn = std::make_unique<ops::PowLayer>();
- fn->configure(lhs_tensor, rhs_tensor, ofm_tensor);
+ fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor);
_return_fn = std::move(fn);
}
@@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(0)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto input_alloc = _tensor_builder->portableAt(input_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_alloc = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::L2NormLayer>();
@@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::ZerosLike &node)
+void KernelGenerator::visit(const ir::operation::Range &node)
{
const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)};
+ const auto start_index{node.getInputs().at(ir::operation::Range::START)};
+ const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
+ const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto start_tensor = _tensor_reg->getPortableTensor(start_index).get();
+ auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get();
+ auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get();
- auto fn = std::make_unique<ops::ZerosLikeLayer>();
+ auto fn = std::make_unique<ops::RangeLayer>();
- fn->configure(input_tensor, output_tensor);
+ fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Range &node)
+void KernelGenerator::visit(const ir::operation::Rank &node)
{
- const auto output_index{node.getOutputs().at(0)};
- const auto start_index{node.getInputs().at(ir::operation::Range::START)};
- const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)};
- const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)};
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto start_tensor = _tensor_builder->portableAt(start_index).get();
- auto limit_tensor = _tensor_builder->portableAt(limit_index).get();
- auto delta_tensor = _tensor_builder->portableAt(delta_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get();
- auto fn = std::make_unique<ops::RangeLayer>();
+ auto fn = std::make_unique<ops::RankLayer>();
+
+ fn->configure(ifm_tensor, ofm_tensor);
- fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor);
_return_fn = std::move(fn);
}
@@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
- auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
auto fn = std::make_unique<ops::SqDiffLayer>();
@@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node)
const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)};
const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get();
auto fn = std::make_unique<ops::TileLayer>();
@@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)};
const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get();
- auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get();
+ auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get();
auto fn = std::make_unique<ops::MatrixBandPartLayer>();
@@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)};
const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get();
- auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get();
+ auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get();
const auto adj_x = node.param().adj_x;
const auto adj_y = node.param().adj_y;
@@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node)
const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)};
const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto shape_tensor = _tensor_builder->portableAt(shape_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get();
auto fn = std::make_unique<ops::BroadcastToLayer>();
@@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node)
{
const auto ofm_index{node.getOutputs().at(0)};
- auto output_tensor = _tensor_builder->portableAt(ofm_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get();
std::vector<const IPortableTensor *> input_tensors;
for (auto &ifm_idx : node.getInputs())
- input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get());
+ input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get());
const auto epsilon = node.param().epsilon;
const auto is_training = node.param().is_training;
@@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node)
const auto beta = node.param().beta;
const auto axis = node.param().axis;
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
auto fn = std::make_unique<ops::LogSoftMaxLayer>();
@@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)};
const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)};
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get();
- auto padding_tensor = _tensor_builder->portableAt(padding_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get();
+ auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get();
auto fn = std::make_unique<ops::SpaceToBatchNDLayer>();
@@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
_return_fn = std::move(fn);
}
-void KernelGenerator::visit(const ir::operation::Quantize &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)};
- const auto output_index{node.getOutputs().at(0)};
-
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
-
- auto fn = std::make_unique<ops::QuantizeLayer>();
-
- fn->configure(input_tensor, output_tensor);
-
- _return_fn = std::move(fn);
-}
-
void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
{
const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
const auto output_index{node.getOutputs().at(0)};
auto block_size = node.param().block_size;
- auto input_tensor = _tensor_builder->portableAt(input_index).get();
- auto output_tensor = _tensor_builder->portableAt(output_index).get();
+ auto input_tensor = _tensor_reg->getPortableTensor(input_index).get();
+ auto output_tensor = _tensor_reg->getPortableTensor(output_index).get();
auto fn = std::make_unique<ops::SpaceToDepthLayer>();
@@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node)
const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)};
const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)};
- auto output_alloc = _tensor_builder->portableAt(output_index).get();
- auto shape_alloc = _tensor_builder->portableAt(shape_index).get();
- auto seed_alloc = _tensor_builder->portableAt(seed_index).get();
+ auto output_alloc = _tensor_reg->getPortableTensor(output_index).get();
+ auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get();
+ auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get();
auto fn = std::make_unique<ops::StatelessRandomUniformLayer>();
@@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node)
const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)};
const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)};
- auto in_tensor = _tensor_builder->portableAt(input_idx).get();
- auto in_size_splits = _tensor_builder->portableAt(size_splits).get();
- auto in_split_dim = _tensor_builder->portableAt(split_dim).get();
+ auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get();
+ auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get();
+ auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get();
std::vector<IPortableTensor *> out_tensors;
for (auto &output_idx : node.getOutputs())
- out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get());
+ out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get());
auto fn = std::make_unique<ops::SplitVLayer>();
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h
index 40c056a96..786e68ee0 100644
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -19,6 +19,7 @@
#include "ExternalContext.h"
#include "TensorBuilder.h"
+#include "backend/cpu_common/TensorRegistry.h"
#include "Tensor.h"
#include <backend/CustomKernelBuilder.h>
@@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator
public:
KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx,
const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg,
const std::shared_ptr<custom::IKernelBuilder> &kernel_builder,
const std::shared_ptr<ExternalContext> &external_context);
@@ -46,8 +48,6 @@ public:
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
- void visit(const ir::operation::MaxPool2D &) override;
- void visit(const ir::operation::AvgPool2D &) override;
void visit(const ir::operation::Concat &) override;
void visit(const ir::operation::Fill &) override;
void visit(const ir::operation::FullyConnected &) override;
@@ -55,51 +55,35 @@ public:
void visit(const ir::operation::Squeeze &) override;
void visit(const ir::operation::Softmax &) override;
void visit(const ir::operation::Comparison &) override;
- void visit(const ir::operation::Add &) override;
- void visit(const ir::operation::Sub &) override;
- void visit(const ir::operation::Mul &) override;
- void visit(const ir::operation::Div &) override;
+ void visit(const ir::operation::BinaryArithmetic &) override;
void visit(const ir::operation::Einsum &) override;
void visit(const ir::operation::Gather &) override;
void visit(const ir::operation::Custom &node) override;
- void visit(const ir::operation::Exp &) override;
+ void visit(const ir::operation::ElementwiseActivation &) override;
+ void visit(const ir::operation::ElementwiseBinary &) override;
+ void visit(const ir::operation::ElementwiseUnary &) override;
void visit(const ir::operation::ExpandDims &) override;
- void visit(const ir::operation::Logistic &) override;
void visit(const ir::operation::Pad &) override;
- void visit(const ir::operation::Max &) override;
- void visit(const ir::operation::Min &) override;
- void visit(const ir::operation::Tanh &) override;
void visit(const ir::operation::Pack &) override;
void visit(const ir::operation::Unpack &) override;
void visit(const ir::operation::OneHot &) override;
- void visit(const ir::operation::Cast &) override;
void visit(const ir::operation::Transpose &) override;
void visit(const ir::operation::Reduce &) override;
- void visit(const ir::operation::ReLU &) override;
- void visit(const ir::operation::ReLU6 &) override;
void visit(const ir::operation::Select &) override;
void visit(const ir::operation::Slice &) override;
void visit(const ir::operation::StridedSlice &) override;
void visit(const ir::operation::Split &) override;
- void visit(const ir::operation::Abs &) override;
- void visit(const ir::operation::Cos &) override;
- void visit(const ir::operation::Sin &) override;
- void visit(const ir::operation::RSQRT &) override;
void visit(const ir::operation::Shape &) override;
void visit(const ir::operation::ResizeBilinear &node) override;
void visit(const ir::operation::Reverse &) override;
- void visit(const ir::operation::Neg &) override;
void visit(const ir::operation::ArgMax &) override;
- void visit(const ir::operation::Log &) override;
- void visit(const ir::operation::Round &) override;
+ void visit(const ir::operation::Pool2D &) override;
void visit(const ir::operation::Pow &) override;
- void visit(const ir::operation::LogicalNot &) override;
- void visit(const ir::operation::ZerosLike &) override;
void visit(const ir::operation::SquaredDifference &) override;
void visit(const ir::operation::Tile &) override;
- void visit(const ir::operation::LogicalOr &) override;
void visit(const ir::operation::L2Normalization &) override;
void visit(const ir::operation::Range &) override;
+ void visit(const ir::operation::Rank &) override;
void visit(const ir::operation::MatrixBandPart &) override;
void visit(const ir::operation::BatchMatMul &) override;
void visit(const ir::operation::BatchToSpaceND &) override;
@@ -107,7 +91,6 @@ public:
void visit(const ir::operation::FusedBatchNorm &) override;
void visit(const ir::operation::LogSoftmax &) override;
void visit(const ir::operation::SpaceToBatchND &) override;
- void visit(const ir::operation::Quantize &) override;
void visit(const ir::operation::SpaceToDepth &) override;
void visit(const ir::operation::StatelessRandomUniform &) override;
void visit(const ir::operation::SplitV &) override;
@@ -116,6 +99,7 @@ private:
const ir::Operands &_ctx;
const ir::Operations &_operations_ctx;
std::shared_ptr<TensorBuilder> _tensor_builder;
+ std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder;
ir::Layout _current_op_seq_layout;
const std::shared_ptr<ExternalContext> _external_context;
diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc
index ab8ba5756..828d52f7c 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.cc
+++ b/runtime/onert/backend/cpu/TensorBuilder.cc
@@ -27,8 +27,8 @@ namespace backend
namespace cpu
{
-TensorBuilder::TensorBuilder()
- : _tensor_reg{new cpu_common::TensorRegistry()},
+TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg)
+ : _tensor_reg{tensor_reg},
_dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)},
_static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())}
{
@@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
assert(_tensor_info_map.find(ind) != _tensor_info_map.end());
const auto tensor_info = _tensor_info_map.at(ind);
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
const auto size = tensor_info.total_size();
_static_tensor_mgr->claimPlan(ind, size);
@@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
{
- if (!at(ind)->is_dynamic())
+ if (!_tensor_reg->getNativeTensor(ind)->is_dynamic())
{
_static_tensor_mgr->releasePlan(ind);
}
@@ -85,29 +85,6 @@ void TensorBuilder::allocate()
// This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation.
}
-std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getITensor(ind);
-}
-
-std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getPortableTensor(ind);
-}
-
-bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor)
-{
- return _tensor_reg->setMigrantTensor(ind, tensor);
-}
-
-void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); }
-
-std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind)
-{
- return _tensor_reg->getNativeTensor(ind);
-}
-
std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void)
{
return std::move(_static_tensor_mgr);
diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h
index 617136514..b6d5f09cc 100644
--- a/runtime/onert/backend/cpu/TensorBuilder.h
+++ b/runtime/onert/backend/cpu/TensorBuilder.h
@@ -38,9 +38,7 @@ namespace cpu
class TensorBuilder : public ITensorBuilder
{
public:
- TensorBuilder();
-
- bool supportDynamicTensor() override { return true; }
+ TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg);
/**
* @brief Register tensor information to allocate on CPU backend
@@ -60,34 +58,12 @@ public:
void allocate() override;
void postFunctionPrepare() override { /* DO NOTHING */}
- /**
- * @brief Get tensor with a specific OperandIndex
- *
- * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise.
- */
- std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override;
-
- void iterate(const IterateFunction &fn) override;
-
std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override;
IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); }
std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override;
- /**
- * @brief Get tensor with a specific OperandIndex.
- * @param ind OperandIndex for the tensor. There must exist a tensor with this ind.
- * If not, program will crash with assert or exception.
- * @return shared_ptr<Tensor>
- */
- std::shared_ptr<Tensor> at(const ir::OperandIndex &ind);
- std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind);
- bool setMigrantTensor(const ir::OperandIndex &ind,
- const std::shared_ptr<IPortableTensor> &tensor) override;
-
- std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; }
-
private:
const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg;
std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr;
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc
deleted file mode 100644
index 322785aeb..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AbsLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void AbsLayer::absFloat32()
-{
- nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; }
-
-void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void AbsLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- absFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- absQuant8();
- }
- else
- {
- throw std::runtime_error{"Abs: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h
deleted file mode 100644
index feb5f35ae..000000000
--- a/runtime/onert/backend/cpu/ops/AbsLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
-
-#include "backend/IPortableTensor.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AbsLayer : public ::onert::exec::IFunction
-{
-public:
- AbsLayer();
-
-public:
- void absFloat32();
-
- void absQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc
deleted file mode 100644
index 379215303..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AddLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void AddLayer::addFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void AddLayer::addInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void AddLayer::addQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- // cker quant8 add is not implemented yet
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void AddLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- addFloat32();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- addQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- addInt32();
- }
- else
- {
- throw std::runtime_error{"Add: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h
deleted file mode 100644
index 91030d93a..000000000
--- a/runtime/onert/backend/cpu/ops/AddLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AddLayer : public ::onert::exec::IFunction
-{
-public:
- AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void addFloat32();
-
- void addQuant8();
-
- void addInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
deleted file mode 100644
index 9c22c1c86..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "AvgPoolLayer.h"
-
-#include <cker/operation/AveragePool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define AVGPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-AvgPoolLayer::AvgPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void AvgPoolLayer::averagePoolFloat32()
-{
- AVGPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-void AvgPoolLayer::averagePoolQuant8()
-{
- AVGPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::AveragePool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- assert(input != nullptr);
- assert(output != nullptr);
-
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void AvgPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- averagePoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- averagePoolQuant8();
- }
- else
- {
- throw std::runtime_error{"AvgPool: unsupported data type"};
- }
-}
-
-#undef AVGPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
deleted file mode 100644
index d4e8f79e7..000000000
--- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class AvgPoolLayer : public ::onert::exec::IFunction
-{
-public:
- AvgPoolLayer();
-
-public:
- void averagePoolFloat32();
-
- void averagePoolQuant8();
-
- void configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
new file mode 100644
index 000000000..f50c63375
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BinaryArithmeticLayer.h"
+
+#include <cker/operation/BinaryArithmeticOps.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T>
+void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ const bool need_broadcast =
+ nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params);
+ if (need_broadcast)
+ {
+ nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ return;
+ }
+
+ nnfw::cker::BinaryArithmeticOp<arithmetic_type>(
+ op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <nnfw::cker::BinaryArithmeticOpType arithmetic_type>
+std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam op_params)
+{
+ switch (lhs->data_type())
+ {
+ case OperandType::FLOAT32:
+ {
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_max = output_activation_max;
+ op_params.float_activation_min = output_activation_min;
+ return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ case OperandType::INT32:
+ {
+ int32_t output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(activation, &output_activation_min, &output_activation_max);
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, op_params);
+ break;
+ }
+ default:
+ throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"};
+ }
+}
+
+void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ // Parameters for scaled quantized computation
+ op_params.left_shift = 20;
+ // Zero-points of input and output tensors
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+ assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
+ assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
+ assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
+
+ // Compute normalized scale for _lhs and _rhs values,
+ // and represent in 32-bit fixed point
+ const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale());
+ const double real_lhs_scale = lhs->data_scale() / norm_max_scale;
+ const double real_rhs_scale = rhs->data_scale() / norm_max_scale;
+ // output scale is used to normalize final result, so we invert the scale here
+ const double real_output_scale =
+ norm_max_scale / (output->data_scale() * (1 << op_params.left_shift));
+
+ // Represent the scales as fixed int32_t multipliers, and int32_t shifts
+ QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
+ QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
+ QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, ir::Activation activation,
+ nnfw::cker::BinaryArithmeticOpParam *params)
+{
+ int32_t output_activation_min, output_activation_max;
+ CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max);
+ nnfw::cker::BinaryArithmeticOpParam &op_params = *params;
+
+ op_params.quantized_activation_max = output_activation_max;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.input1_offset = -lhs->data_offset();
+ op_params.input2_offset = -rhs->data_offset();
+ op_params.output_offset = output->data_offset();
+
+ double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale();
+ QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
+}
+
+} // namespace
+
+void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ir::Activation activation,
+ const ArithmeticType arithmetic_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ switch (arithmetic_type)
+ {
+ case ArithmeticType::kAdd:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kSub:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ op_params.input2_multiplier *= -1;
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kMul:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ nnfw::cker::BinaryArithmeticOpParam op_params;
+ setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params);
+ _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>,
+ std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ op_params);
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation,
+ op_params);
+ }
+ break;
+ case ArithmeticType::kDiv:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ throw std::runtime_error{
+ "BinaryArithmetic(Div): Div operation does not support quantization"};
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"};
+ }
+ else
+ {
+ _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation,
+ op_params);
+ }
+ break;
+ default:
+ throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"};
+ }
+}
+
+void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
index 9411be76e..d6b33ad07 100644
--- a/runtime/onert/backend/cpu/ops/DivLayer.h
+++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,21 +31,25 @@ namespace cpu
namespace ops
{
-class DivLayer : public ::onert::exec::IFunction
+enum class ArithmeticType
+{
+ kAdd,
+ kSub,
+ kMul,
+ kDiv,
+};
+
+class BinaryArithmeticLayer : public ::onert::exec::IFunction
{
public:
- DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- void divFloat32();
-
- void divQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ir::Activation activation, const ArithmeticType arithmetic_type);
void run() override;
@@ -54,7 +58,7 @@ private:
const IPortableTensor *_rhs;
IPortableTensor *_output;
- ir::Activation _activation{ir::Activation::NONE};
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -62,4 +66,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc
deleted file mode 100644
index 497515606..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CastLayer.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-CastLayer::CastLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out)
-{
- auto input_shape = getTensorShape(_input);
- auto output_shape = getTensorShape(_output);
- const auto num_elements = MatchingFlatSize(input_shape, output_shape);
-
- std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); });
-}
-
-template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out)
-{
- switch (_output->data_type())
- {
- case ir::DataType::FLOAT32:
- castTensor(in, out.f);
- return;
- case ir::DataType::INT32:
- castTensor(in, out.i32);
- return;
- case ir::DataType::UINT32:
- castTensor(in, out.u32);
- return;
- case ir::DataType::UINT8:
- castTensor(in, out.u8);
- return;
- case ir::DataType::BOOL8:
- castTensor(in, out.b);
- return;
- case ir::DataType::INT64:
- castTensor(in, out.i64);
- return;
- default:
- throw std::runtime_error("Not supported output type" +
- std::to_string((int)_output->data_type()));
- }
-}
-
-void CastLayer::run()
-{
- auto input_buf = _input->buffer();
- auto output_buf = _output->buffer();
- const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
- auto out = *reinterpret_cast<DataPtr *>(&output_buf);
-
- switch (_input->data_type())
- {
- case ir::DataType::FLOAT32:
- castPtr(in.f, out);
- return;
- case ir::DataType::INT32:
- castPtr(in.i32, out);
- return;
- case ir::DataType::UINT32:
- castPtr(in.u32, out);
- return;
- case ir::DataType::UINT8:
- castPtr(in.u8, out);
- return;
- case ir::DataType::BOOL8:
- castPtr(in.b, out);
- return;
- case ir::DataType::INT64:
- castPtr(in.i64, out);
- return;
- default:
- throw std::runtime_error("Cast: unsupported data type" +
- std::to_string((int)_input->data_type()));
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h
deleted file mode 100644
index 290c722e2..000000000
--- a/runtime/onert/backend/cpu/ops/CastLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class CastLayer : public ::onert::exec::IFunction
-{
-public:
- CastLayer();
-
-public:
- template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out);
- template <typename FromT> void castPtr(const FromT *in, DataPtr out);
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
index 2d5bbef1e..c057267d3 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
@@ -31,7 +31,8 @@ namespace ops
ConvolutionLayer::ConvolutionLayer()
: _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
_paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE),
+ _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1),
+ _dilationHeightFactor(1), _activation(ir::Activation::NONE),
_conv_kernel(new nnfw::cker::Conv()), _prepare(false)
{
// DO NOTHING
@@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32()
op_params.padding_values.height = _paddingTop;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
@@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8()
nnfw::cker::ConvParams op_params;
op_params.stride_width = _strideWidth;
op_params.stride_height = _strideHeight;
- op_params.dilation_width_factor = 1;
- op_params.dilation_height_factor = 1;
+ op_params.dilation_width_factor = _dilationWidthFactor;
+ op_params.dilation_height_factor = _dilationHeightFactor;
op_params.padding_type = getPaddingType(_paddingType);
op_params.padding_values.width = _paddingLeft;
op_params.padding_values.height = _paddingTop;
@@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
const uint32_t paddingLeft, const uint32_t paddingRight,
const uint32_t paddingTop, const uint32_t paddingBottom,
const uint32_t strideWidth, const uint32_t strideHeight,
+ const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor,
const ir::Activation activation, IPortableTensor *output)
{
_input = input;
@@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe
_paddingBottom = paddingBottom;
_strideWidth = strideWidth;
_strideHeight = strideHeight;
+ _dilationWidthFactor = dilationWidthFactor;
+ _dilationHeightFactor = dilationHeightFactor;
_activation = activation;
_output = output;
}
@@ -145,7 +150,8 @@ void ConvolutionLayer::run()
param_padding.param.bottom = _paddingBottom;
const auto padding =
- ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height);
+ ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height,
+ _dilationWidthFactor, _dilationHeightFactor);
_paddingLeft = padding.left;
_paddingRight = padding.right;
@@ -176,7 +182,8 @@ void ConvolutionLayer::prepare()
{
bool is_transposed = false;
kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()),
- getPaddingType(_paddingType), is_transposed);
+ getPaddingType(_paddingType), is_transposed, _dilationWidthFactor,
+ _dilationHeightFactor);
// Decrease reference of _kernel(weights) only when _kernel is constant
if (is_transposed)
diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
index 2833387c4..398892e65 100644
--- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
+++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h
@@ -56,7 +56,8 @@ public:
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const ir::Activation activation,
+ const uint32_t strideHeight, const uint32_t dilationWidthFactor,
+ const uint32_t dilationHeightFactor, const ir::Activation activation,
IPortableTensor *output);
void run() override;
@@ -77,6 +78,8 @@ private:
uint32_t _strideWidth;
uint32_t _strideHeight;
+ uint32_t _dilationWidthFactor;
+ uint32_t _dilationHeightFactor;
ir::Activation _activation;
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc
deleted file mode 100644
index 9417019d5..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "CosLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-CosLayer::CosLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void CosLayer::cosFloat32()
-{
- nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; }
-
-void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void CosLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- cosFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- cosQuant8();
- }
- else
- {
- throw std::runtime_error{"Cos: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h
deleted file mode 100644
index 1fadef718..000000000
--- a/runtime/onert/backend/cpu/ops/CosLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class CosLayer : public ::onert::exec::IFunction
-{
-public:
- CosLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void cosFloat32();
- void cosQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc
deleted file mode 100644
index 556c55e33..000000000
--- a/runtime/onert/backend/cpu/ops/DivLayer.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DivLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void DivLayer::divFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs);
- if (requires_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- }
-}
-
-void DivLayer::divQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- // op_params.quantized_activation_max = output_activation_max;
- // op_params.quantized_activation_min = output_activation_min;
-
- // cker quant8 div is not implemented yet
- throw std::runtime_error{"Div NYI for quantized"};
-}
-
-void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void DivLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- divFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- divQuant8();
- }
- else
- {
- throw std::runtime_error{"Div: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
new file mode 100644
index 000000000..c1d63172b
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseActivationLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Logistic.h>
+#include <cker/operation/ReLU.h>
+#include <cker/operation/ReLU6.h>
+#include <cker/operation/Tanh.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+ElementwiseActivationLayer::ElementwiseActivationLayer()
+ : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type)
+{
+ const auto input_scale = static_cast<double>(_input->data_scale());
+ const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
+ const auto output_scale = static_cast<double>(_output->data_scale());
+ const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
+ const float inverse_scale = 1 / output_scale;
+ int32_t maxval = std::numeric_limits<uint8_t>::max();
+ int32_t minval = std::numeric_limits<uint8_t>::min();
+ for (int32_t val = minval; val <= maxval; ++val)
+ {
+ const float dequantized = input_scale * (val - input_zero_point);
+ float transformed = 0.f;
+ if (op_type == ElementwiseActivationType::kTanh)
+ {
+ transformed = std::tanh(dequantized);
+ }
+ else if (op_type == ElementwiseActivationType::kLogistic)
+ {
+ transformed = 1.0f / (1.0f + std::exp(-dequantized));
+ }
+ else
+ {
+ throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type");
+ }
+ const float rescaled = std::round(transformed * inverse_scale);
+ const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+ _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
+ }
+}
+
+void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output));
+ const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer());
+ uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer());
+
+ for (int i = 0; i < size; ++i)
+ {
+ output_data[i] = _table[input_data[i]];
+ }
+}
+
+void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ float alpha, float beta,
+ ElementwiseActivationType op_type)
+{
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseActivationType::kLogistic:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Logistic(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kReLU:
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else if (alpha == 6.f && beta == 0.f)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::ReLU6(getTensorShape(input),
+ reinterpret_cast<const float *>(input->buffer()),
+ reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error(
+ "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)");
+ }
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"};
+ }
+ break;
+ case ElementwiseActivationType::kTanh:
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(op_type);
+ _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this,
+ std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (_input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = [](const IPortableTensor *input, IPortableTensor *output) {
+ nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+ };
+ }
+ else
+ {
+ throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error("ElementwiseActivationLayer: unsupported op type");
+ }
+}
+
+void ElementwiseActivationLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
index 35a184074..3ef580041 100644
--- a/runtime/onert/backend/cpu/ops/TanhLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,26 +30,33 @@ namespace cpu
namespace ops
{
-class TanhLayer : public ::onert::exec::IFunction
+enum class ElementwiseActivationType
{
-public:
- TanhLayer();
+ kLogistic,
+ kReLU,
+ kTanh
+};
+class ElementwiseActivationLayer : public ::onert::exec::IFunction
+{
public:
- void tanhFloat32();
+ ElementwiseActivationLayer();
- void tanhQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta,
+ const ElementwiseActivationType op_type);
void run() override;
- void PopulateLookupTable();
+ void PopulateLookupTable(const ElementwiseActivationType op_type);
+
+ void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output);
private:
const IPortableTensor *_input;
IPortableTensor *_output;
uint8_t _table[256];
+ std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel;
};
} // namespace ops
@@ -57,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
new file mode 100644
index 000000000..ea3c1e7cd
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseBinaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/LogicalOr.h>
+#include <cker/operation/MaxMin.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output)
+{
+ if (!HaveSameShapes(lhs, rhs))
+ {
+ nnfw::cker::LogicalOrBroadcast<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs),
+ reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+ }
+ else
+ {
+ nnfw::cker::LogicalOrElementwise<T>(
+ getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer()));
+ }
+}
+
+template <typename T>
+void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output)
+{
+ nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()),
+ getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()),
+ getTensorShape(output), reinterpret_cast<T *>(output->buffer()));
+}
+
+bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ const IPortableTensor *output)
+{
+ return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) &&
+ (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset());
+}
+} // namespace
+
+void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
+ IPortableTensor *output, const ElementwiseBinaryType op_type)
+{
+ assert(lhs != nullptr);
+ assert(rhs != nullptr);
+ assert(output != nullptr);
+
+ _lhs = lhs;
+ _rhs = rhs;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseBinaryType::kLogicalOr:
+ if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalOrGeneric<bool>;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalOr: Unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMax:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Max NYI for quantized");
+ }
+ _kernel = maximumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = maximumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Max: unsupported data type"};
+ }
+ break;
+ case ElementwiseBinaryType::kMin:
+ if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ if (!haveSameQauntInfo(_lhs, _rhs, _output))
+ {
+ throw std::runtime_error("Min NYI for quantized");
+ }
+ _kernel = minimumGeneric<uint8_t>;
+ }
+ else if (_lhs->data_type() == OperandType::INT32)
+ {
+ _kernel = minimumGeneric<int32_t>;
+ }
+ else if (_lhs->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = minimumGeneric<float>;
+ }
+ else
+ {
+ throw std::runtime_error{"Min: unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
index ed8dc5b0f..052747a4c 100644
--- a/runtime/onert/backend/cpu/ops/MaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,20 +30,25 @@ namespace cpu
namespace ops
{
-class MaxLayer : public ::onert::exec::IFunction
+enum class ElementwiseBinaryType
+{
+ kLogicalAnd,
+ kLogicalOr,
+ kMax,
+ kMin,
+};
+
+class ElementwiseBinaryLayer : public ::onert::exec::IFunction
{
public:
- MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
+ ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
{
// DO NOTHING
}
public:
- template <typename T> void maximum();
-
- void maxQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
+ void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output,
+ const ElementwiseBinaryType op_type);
void run() override;
@@ -51,6 +56,7 @@ private:
const IPortableTensor *_lhs;
const IPortableTensor *_rhs;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -58,4 +64,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
new file mode 100644
index 000000000..f8f89ab15
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ElementwiseUnaryLayer.h"
+
+#include "OperationUtils.h"
+
+#include <cker/operation/Elementwise.h>
+#include <cker/operation/Erf.h>
+#include <cker/operation/Exp.h>
+#include <cker/operation/LogicalNot.h>
+#include <cker/operation/Quantize.h>
+#include <cker/operation/Round.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+void absFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename FromT>
+void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out)
+{
+ switch (data_type_out)
+ {
+ case ir::DataType::FLOAT32:
+ std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); });
+ return;
+ case ir::DataType::INT32:
+ std::transform(in, in + num_elements, out.i32,
+ [](FromT a) { return static_cast<int32_t>(a); });
+ return;
+ case ir::DataType::UINT32:
+ std::transform(in, in + num_elements, out.u32,
+ [](FromT a) { return static_cast<uint32_t>(a); });
+ return;
+ case ir::DataType::UINT8:
+ std::transform(in, in + num_elements, out.u8,
+ [](FromT a) { return static_cast<uint8_t>(a); });
+ return;
+ case ir::DataType::BOOL8:
+ std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); });
+ return;
+ case ir::DataType::INT64:
+ std::transform(in, in + num_elements, out.i64,
+ [](FromT a) { return static_cast<int64_t>(a); });
+ return;
+ default:
+ throw std::runtime_error("Cast: Not supported output type" +
+ std::to_string((int)data_type_out));
+ }
+}
+
+void cast(const IPortableTensor *input, IPortableTensor *output)
+{
+ auto input_buf = input->buffer();
+ auto output_buf = output->buffer();
+ const auto in = *reinterpret_cast<const DataPtr *>(&input_buf);
+ auto out = *reinterpret_cast<DataPtr *>(&output_buf);
+
+ auto input_shape = getTensorShape(input);
+ auto output_shape = getTensorShape(output);
+ const auto num_elements = MatchingFlatSize(input_shape, output_shape);
+
+ switch (input->data_type())
+ {
+ case ir::DataType::FLOAT32:
+ castPtr(in.f, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT32:
+ castPtr(in.i32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT32:
+ castPtr(in.u32, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::UINT8:
+ castPtr(in.u8, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::BOOL8:
+ castPtr(in.b, out, num_elements, output->data_type());
+ return;
+ case ir::DataType::INT64:
+ castPtr(in.i64, out, num_elements, output->data_type());
+ return;
+ default:
+ throw std::runtime_error("Cast: unsupported data type" +
+ std::to_string((int)input->data_type()));
+ }
+}
+
+void cosFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void expFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void erfFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void logicalNot(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<bool *>(output->buffer()));
+}
+
+void negFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename InputT, typename OutputT>
+void affineQuantize(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()),
+ output->data_scale(), output->data_offset());
+}
+
+void roundFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+void sinFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()),
+ getTensorShape(output), reinterpret_cast<float *>(output->buffer()));
+}
+
+template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output)
+{
+ if (!HaveSameShapes(input, output))
+ throw std::runtime_error{"ZerosLike: input and output shape don't match."};
+
+ auto element_size = getTensorShape(input).FlatSize();
+
+ memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T));
+}
+} // namespace
+
+void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ switch (op_type)
+ {
+ case ElementwiseUnaryType::kAbs:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = absFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Abs: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kCast:
+ _kernel = cast;
+ break;
+ case ElementwiseUnaryType::kCos:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = cosFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Cos: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kExp:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = expFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kErf:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = erfFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Exp: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLog:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = logFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Log: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kLogicalNot:
+ if ((input->data_type() == OperandType::BOOL8))
+ {
+ _kernel = logicalNot;
+ }
+ else
+ {
+ throw std::runtime_error{"LogicalNot: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kNeg:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = negFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Neg: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kQuantize:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = affineQuantize<float, uint8_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"Quantize: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRound:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = roundFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Round: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kRSqrt:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = rsqrtFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"RSqrt: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kSin:
+ if ((input->data_type() == OperandType::FLOAT32))
+ {
+ _kernel = sinFloat32;
+ }
+ else
+ {
+ throw std::runtime_error{"Sin: Unsupported data type"};
+ }
+ break;
+ case ElementwiseUnaryType::kZerosLike:
+ if (input->data_type() == OperandType::FLOAT32)
+ {
+ _kernel = zerosLikeFloat32<float>;
+ }
+ else if (input->data_type() == OperandType::INT32)
+ {
+ _kernel = zerosLikeFloat32<int32_t>;
+ }
+ else
+ {
+ throw std::runtime_error{"ZerosLike: Unsupported data type"};
+ }
+ break;
+ default:
+ throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"};
+ }
+}
+
+void ElementwiseUnaryLayer::run() { _kernel(_input, _output); }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
index 994d17a30..74968386d 100644
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h
+++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
#include <backend/IPortableTensor.h>
@@ -30,23 +30,41 @@ namespace cpu
namespace ops
{
-class ReLU6Layer : public ::onert::exec::IFunction
+enum class ElementwiseUnaryType
{
-public:
- ReLU6Layer();
+ kAbs,
+ kCast,
+ kCos,
+ kErf,
+ kExp,
+ kLog,
+ kLogicalNot,
+ kNeg,
+ kQuantize,
+ kRound,
+ kRSqrt,
+ kSin,
+ kZerosLike
+};
+class ElementwiseUnaryLayer : public ::onert::exec::IFunction
+{
public:
- void relu6Float32();
+ ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel()
+ {
+ // DO NOTHING
+ }
- void relu6Quant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
+public:
+ void configure(const IPortableTensor *input, IPortableTensor *output,
+ const ElementwiseUnaryType op_type);
void run() override;
private:
const IPortableTensor *_input;
IPortableTensor *_output;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -54,4 +72,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc
deleted file mode 100644
index 4dbec9cd5..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ExpLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Exp.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ExpLayer::expFloat32()
-{
- nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ExpLayer::expQuant8()
-{
- // cker quant8 exp is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ExpLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- expFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- expQuant8();
- }
- else
- {
- throw std::runtime_error{"Exp: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/backend/cpu/ops/ExpLayer.h
deleted file mode 100644
index cd27b0e40..000000000
--- a/runtime/onert/backend/cpu/ops/ExpLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ExpLayer : public ::onert::exec::IFunction
-{
-public:
- ExpLayer();
-
-public:
- void expFloat32();
-
- void expQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc
deleted file mode 100644
index 307c15bc4..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogLayer::LogLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogLayer::logFloat32()
-{
- nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; }
-
-void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logQuant8();
- }
- else
- {
- throw std::runtime_error{"Log: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h
deleted file mode 100644
index 2f6b4b570..000000000
--- a/runtime/onert/backend/cpu/ops/LogLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogLayer : public ::onert::exec::IFunction
-{
-public:
- LogLayer();
-
-public:
- void logFloat32();
-
- void logQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
index 06dde4fc4..1d7ee6caa 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc
@@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.
// DO NOTHING
}
+void LogSoftMaxLayer::PopulateLookupTable(const float kBeta)
+{
+ const float scale = -_input->data_scale() * kBeta;
+ const int32_t max_uint8 = std::numeric_limits<uint8_t>::max();
+ for (int32_t val = 0; val <= max_uint8; ++val)
+ {
+ _table[max_uint8 - val] = expf(scale * val);
+ }
+}
+
void LogSoftMaxLayer::logsoftmaxFloat32()
{
nnfw::cker::SoftmaxParams op_params;
@@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32()
void LogSoftMaxLayer::logsoftmaxQuant8()
{
- // NYI
+ nnfw::cker::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ op_params.axis = _axis;
+ op_params.table = _table;
+ op_params.zero_point = _output->data_offset();
+ op_params.scale = _output->data_scale();
+ nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input),
+ reinterpret_cast<const uint8_t *>(_input->buffer()),
+ getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
}
void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis,
@@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta,
_output = output;
_beta = beta;
_axis = axis;
+ if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ PopulateLookupTable(_beta);
+ }
}
void LogSoftMaxLayer::run()
@@ -66,7 +88,7 @@ void LogSoftMaxLayer::run()
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- throw std::runtime_error{"LogSoftmax : NYI"};
+ logsoftmaxQuant8();
}
else
{
diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
index ba9deca17..1533f3361 100644
--- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
+++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h
@@ -45,12 +45,15 @@ public:
void run();
+ void PopulateLookupTable(const float kBeta);
+
private:
const IPortableTensor *_input;
IPortableTensor *_output;
float _beta;
int _axis;
+ float _table[256];
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
deleted file mode 100644
index f2192c148..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalNotLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalNot.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogicalNotLayer::logicalNotBool8()
-{
- nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer()));
-}
-
-void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void LogicalNotLayer::run()
-{
- if (_input->data_type() == OperandType::BOOL8)
- {
- logicalNotBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalNot: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
deleted file mode 100644
index 5543cca3d..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogicalNotLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalNotLayer();
-
-public:
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void logicalNotBool8();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
deleted file mode 100644
index 5b7c9f6f0..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogicalOrLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/LogicalOr.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-void LogicalOrLayer::lorBool8()
-{
- if (!HaveSameShapes(_lhs, _rhs))
- {
- nnfw::cker::LogicalOrBroadcast<bool>(
- getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs),
- reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output),
- reinterpret_cast<bool *>(_output->buffer()));
- }
- else
- {
- nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs),
- reinterpret_cast<const bool *>(_lhs->buffer()),
- reinterpret_cast<const bool *>(_rhs->buffer()),
- reinterpret_cast<bool *>(_output->buffer()));
- }
-}
-
-void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void LogicalOrLayer::run()
-{
- if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8))
- {
- lorBool8();
- }
- else
- {
- throw std::runtime_error{"LogicalOr: Unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
deleted file mode 100644
index efaf396e8..000000000
--- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class LogicalOrLayer : public ::onert::exec::IFunction
-{
-public:
- LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // Nothing
- }
-
-public:
- void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- void lorBool8();
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc
deleted file mode 100644
index 140ab4d2c..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LogisticLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Logistic.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void LogisticLayer::populateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void LogisticLayer::logisticFloat32()
-{
- nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void LogisticLayer::logisticQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- if (_output->data_scale() != 1.f / 256)
- {
- throw std::runtime_error{"incorrect scale for output"};
- }
- populateLookupTable();
- }
-}
-
-void LogisticLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- logisticFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- logisticQuant8();
- }
- else
- {
- throw std::runtime_error{"Logistic: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h
deleted file mode 100644
index cac77939d..000000000
--- a/runtime/onert/backend/cpu/ops/LogisticLayer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class LogisticLayer : public ::onert::exec::IFunction
-{
-public:
- LogisticLayer();
-
-public:
- void logisticFloat32();
-
- void logisticQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
- void populateLookupTable();
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-
- uint8_t _table[256];
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc
deleted file mode 100644
index 9631983be..000000000
--- a/runtime/onert/backend/cpu/ops/MaxLayer.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MaxLayer::maximum()
-{
- nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MaxLayer::maxQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Max<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Max NYI for quantized");
-}
-
-void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MaxLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- maximum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxQuant8();
- }
- else
- {
- throw std::runtime_error{"Max: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
deleted file mode 100644
index 1e983b408..000000000
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MaxPoolLayer.h"
-
-#include <cker/operation/MaxPool.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-#define MAXPOOLING_PARAMETERS \
- nnfw::cker::PoolParams op_params; \
- op_params.stride_height = _strideHeight; \
- op_params.stride_width = _strideWidth; \
- op_params.filter_height = _kernelHeight; \
- op_params.filter_width = _kernelWidth; \
- op_params.padding_values.height = (int8_t)_paddingTop; \
- op_params.padding_values.width = (int8_t)_paddingLeft;
-
-MaxPoolLayer::MaxPoolLayer()
- : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0),
- _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0),
- _activation(ir::Activation::NONE)
-{
- // DO NOTHING
-}
-
-void MaxPoolLayer::maxPoolFloat32()
-{
- MAXPOOLING_PARAMETERS
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<float *>(_output->buffer()));
-}
-void MaxPoolLayer::maxPoolQuant8()
-{
- MAXPOOLING_PARAMETERS
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- op_params.quantized_activation_min = output_activation_min;
- op_params.quantized_activation_max = output_activation_max;
-
- nnfw::cker::MaxPool(op_params, getTensorShape(_input),
- reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output),
- reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft,
- const uint32_t paddingRight, const uint32_t paddingTop,
- const uint32_t paddingBottom, const uint32_t strideWidth,
- const uint32_t strideHeight, const uint32_t kernelWidth,
- const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output)
-{
- _input = input;
- _paddingLeft = paddingLeft;
- _paddingRight = paddingRight;
- _paddingTop = paddingTop;
- _paddingBottom = paddingBottom;
- _strideWidth = strideWidth;
- _strideHeight = strideHeight;
- _kernelWidth = kernelWidth;
- _kernelHeight = kernelHeight;
- _activation = activation;
- _output = output;
-}
-
-void MaxPoolLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- maxPoolFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- maxPoolQuant8();
- }
- else
- {
- throw std::runtime_error{"MaxPool: unsupported data type"};
- }
-}
-
-#undef MAXPOOLING_PARAMETERS
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc
deleted file mode 100644
index 20859673b..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MinLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/MaxMin.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-template <typename T> void MinLayer::minimum()
-{
- nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<T *>(_output->buffer()));
-}
-
-void MinLayer::minQuant8()
-{
- if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale())
- {
- if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset())
- {
- return nnfw::cker::Min<uint8_t>(
- getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- }
- }
- throw std::runtime_error("Min NYI for quantized");
-}
-
-void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- IPortableTensor *output)
-{
- assert(lhs != nullptr);
- assert(rhs != nullptr);
- assert(output != nullptr);
-
- _lhs = lhs;
- _rhs = rhs;
- _output = output;
-}
-
-void MinLayer::run()
-{
- if (_lhs->data_type() == OperandType::FLOAT32)
- {
- minimum<float>();
- }
- else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- minQuant8();
- }
- else if (_lhs->data_type() == OperandType::INT32)
- {
- minimum<int32_t>();
- }
- else
- {
- throw std::runtime_error{"Min: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h
deleted file mode 100644
index 9bd114e54..000000000
--- a/runtime/onert/backend/cpu/ops/MinLayer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MinLayer : public ::onert::exec::IFunction
-{
-public:
- MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- template <typename T> void minimum();
-
- void minQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc
deleted file mode 100644
index eef73edf3..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "MulLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void MulLayer::mulFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void MulLayer::mulQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
-
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
-
- double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale();
- QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void MulLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- mulFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- mulQuant8();
- }
- else
- {
- throw std::runtime_error{"Mul: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h
deleted file mode 100644
index 2c4a98875..000000000
--- a/runtime/onert/backend/cpu/ops/MulLayer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class MulLayer : public ::onert::exec::IFunction
-{
-public:
- MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void mulFloat32();
-
- void mulQuant8();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc
deleted file mode 100644
index 2cb95b771..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "NegLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-NegLayer::NegLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void NegLayer::negFloat32()
-{
- nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; }
-
-void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void NegLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- negFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- negQuant8();
- }
- else
- {
- throw std::runtime_error{"Neg: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h
deleted file mode 100644
index addf84ec2..000000000
--- a/runtime/onert/backend/cpu/ops/NegLayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class NegLayer : public ::onert::exec::IFunction
-{
-public:
- NegLayer();
-
-public:
- void negFloat32();
-
- void negQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc
new file mode 100644
index 000000000..85d02a751
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PoolLayer.h"
+
+#include <cker/operation/AveragePool.h>
+#include <cker/operation/MaxPool.h>
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+template <typename T>
+void avgPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::AveragePool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+void maxPool2D(const nnfw::cker::PoolParams &params, const IPortableTensor *input,
+ IPortableTensor *output)
+{
+ nnfw::cker::MaxPool<T>(params, getTensorShape(input),
+ reinterpret_cast<const T *>(input->buffer()), getTensorShape(output),
+ reinterpret_cast<T *>(output->buffer()));
+}
+
+template <typename T>
+std::function<void(const IPortableTensor *, IPortableTensor *)>
+generateKernelGeneric(const nnfw::cker::PoolParams &params, PoolType op_type)
+{
+ if (op_type == PoolType::kAvg)
+ {
+ return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else if (op_type == PoolType::kMax)
+ {
+ return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported pool type"};
+ }
+}
+} // namespace
+
+PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel()
+{
+ // DO NOTHING
+}
+
+#define POOLING_PARAMETERS \
+ nnfw::cker::PoolParams op_params; \
+ op_params.stride_height = strideHeight; \
+ op_params.stride_width = strideWidth; \
+ op_params.filter_height = kernelHeight; \
+ op_params.filter_width = kernelWidth; \
+ op_params.padding_values.height = (int8_t)paddingTop; \
+ op_params.padding_values.width = (int8_t)paddingLeft;
+
+void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t,
+ const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth,
+ const uint32_t strideHeight, const uint32_t kernelWidth,
+ const uint32_t kernelHeight, const ir::Activation activation,
+ IPortableTensor *output, const PoolType op_type)
+{
+ assert(input != nullptr);
+ assert(output != nullptr);
+
+ _input = input;
+ _output = output;
+
+ POOLING_PARAMETERS
+ if (_input->data_type() == OperandType::FLOAT32)
+ {
+ float output_activation_min = 0;
+ float output_activation_max = 0;
+ CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
+ _kernel = generateKernelGeneric<float>(op_params, op_type);
+ }
+ else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
+ {
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ CalculateActivationRangeUint8(activation, _output, &output_activation_min,
+ &output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+ _kernel = generateKernelGeneric<uint8_t>(op_params, op_type);
+ }
+ else
+ {
+ throw std::runtime_error{"Pool: unsupported data type"};
+ }
+}
+
+void PoolLayer::run() { _kernel(_input, _output); }
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h
index 4c5109f64..b37835946 100644
--- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h
+++ b/runtime/onert/backend/cpu/ops/PoolLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
@@ -31,22 +31,25 @@ namespace cpu
namespace ops
{
-class MaxPoolLayer : public ::onert::exec::IFunction
+enum class PoolType
{
-public:
- MaxPoolLayer();
+ kAvg,
+ kL2,
+ kMax,
+};
+class PoolLayer : public ::onert::exec::IFunction
+{
public:
- void maxPoolFloat32();
-
- void maxPoolQuant8();
+ PoolLayer();
+public:
void configure(const IPortableTensor *input, const uint32_t paddingLeft,
const uint32_t paddingRight, const uint32_t paddingTop,
const uint32_t paddingBottom, const uint32_t strideWidth,
const uint32_t strideHeight, const uint32_t kernelWidth,
const uint32_t kernelHeight, const ir::Activation activation,
- IPortableTensor *output);
+ IPortableTensor *output, const PoolType op_type);
void run() override;
@@ -54,17 +57,7 @@ private:
const IPortableTensor *_input;
IPortableTensor *_output;
- uint32_t _paddingLeft;
- uint32_t _paddingTop;
- uint32_t _paddingRight;
- uint32_t _paddingBottom;
-
- uint32_t _strideWidth;
- uint32_t _strideHeight;
- uint32_t _kernelWidth;
- uint32_t _kernelHeight;
-
- ir::Activation _activation;
+ std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel;
};
} // namespace ops
@@ -72,4 +65,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
deleted file mode 100644
index 45fc148bf..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "QuantizeLayer.h"
-
-#include <cker/operation/Quantize.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize()
-{
- nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()),
- _output->data_scale(), _output->data_offset());
-}
-
-void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void QuantizeLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- affineQuantize<float, uint8_t>();
- }
- else
- {
- throw std::runtime_error{"Quantize: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h
deleted file mode 100644
index b4e7aca40..000000000
--- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class QuantizeLayer : public ::onert::exec::IFunction
-{
-public:
- QuantizeLayer();
-
-public:
- template <typename InputT, typename OutputT> void affineQuantize();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc
index 185d7554e..4690bdf72 100644
--- a/runtime/onert/backend/cpu/ops/RoundLayer.cc
+++ b/runtime/onert/backend/cpu/ops/RankLayer.cc
@@ -14,12 +14,10 @@
* limitations under the License.
*/
-#include "RoundLayer.h"
+#include "RankLayer.h"
#include "OperationUtils.h"
-#include <cker/operation/Round.h>
-
namespace onert
{
namespace backend
@@ -28,32 +26,28 @@ namespace cpu
{
namespace ops
{
-RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-void RoundLayer::roundFloat32()
+RankLayer::RankLayer() : _input(nullptr), _output(nullptr)
{
- nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ // DO NOTHING
}
-void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output)
+void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output)
{
_input = input;
_output = output;
}
-void RoundLayer::run()
+void RankLayer::run()
{
- if (_input->data_type() == OperandType::FLOAT32)
+ if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32)
{
- roundFloat32();
+ int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer());
+ output_data[0] = _input->num_dimensions();
}
else
{
- throw std::runtime_error{"Round: unsupported data type"};
+ throw std::runtime_error{"Rank : unsupported data type"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h
index 054894203..6282ceb07 100644
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h
+++ b/runtime/onert/backend/cpu/ops/RankLayer.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
+#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
#include <backend/IPortableTensor.h>
@@ -29,11 +29,13 @@ namespace cpu
{
namespace ops
{
-class ZerosLikeLayer : public ::onert::exec::IFunction
+
+class RankLayer : public ::onert::exec::IFunction
{
public:
- ZerosLikeLayer();
+ RankLayer();
+public:
void configure(const IPortableTensor *input, IPortableTensor *output);
void run() override;
@@ -48,4 +50,4 @@ private:
} // namespace backend
} // namespace onert
-#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__
+#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
deleted file mode 100644
index 26eb35e0d..000000000
--- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLU6Layer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU6.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLU6Layer::relu6Float32()
-{
- nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLU6Layer::relu6Quant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLU6Layer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- relu6Float32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- relu6Quant8();
- }
- else
- {
- throw std::runtime_error{"ReLU6: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc
deleted file mode 100644
index cb4529feb..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ReLULayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/ReLU.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ReLULayer::reluFloat32()
-{
- nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void ReLULayer::reluQuant8()
-{
- // cker quant8 relu is not implemented yet
- throw std::runtime_error{"NYI"};
-}
-
-void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ReLULayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- reluFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- reluQuant8();
- }
- else
- {
- throw std::runtime_error{"ReLU: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h
deleted file mode 100644
index 4ba2be772..000000000
--- a/runtime/onert/backend/cpu/ops/ReLULayer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class ReLULayer : public ::onert::exec::IFunction
-{
-public:
- ReLULayer();
-
-public:
- void reluFloat32();
-
- void reluQuant8();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
index fe22dbed7..bb5f85d60 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc
@@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std:
}
template <typename T>
-void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes,
- bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kSum:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel,
- [](const T current, const T in) -> T { return in + current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel,
+ [](const T current, const T in) -> T { return in + current; });
break;
case ReduceType::kProd:
- return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel,
- [](const T current, const T in) -> T { return in * current; });
+ return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel,
+ [](const T current, const T in) -> T { return in * current; });
break;
case ReduceType::kMax:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel,
[](const T current, const T in) -> T { return (in > current) ? in : current; });
break;
case ReduceType::kMin:
- return evalLogic<T>(
- input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
+ return std::bind(
+ &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
+ keep_dims, std::numeric_limits<T>::max(), reduce_kernel,
[](const T current, const T in) -> T { return (in < current) ? in : current; });
break;
default:
@@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std::
// Template specialization for bool type
template <>
-void evalType<bool>(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel,
- ReduceType reduce_type)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (reduce_type)
{
case ReduceType::kAny:
- return evalLogic<bool>(
- input, output, axes, keep_dims, false, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in || current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, false, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in || current; });
break;
case ReduceType::kAll:
- return evalLogic<bool>(
- input, output, axes, keep_dims, true, reduce_kernel,
- [](const bool current, const bool in) -> bool { return in && current; });
+ return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, true, reduce_kernel,
+ [](const bool current, const bool in) -> bool { return in && current; });
break;
default:
throw std::runtime_error{"Reduce: Unsupported reduce type"};
}
}
-template <ReduceType reduce_type>
-void evalGeneric(const IPortableTensor *input, IPortableTensor *output,
- const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel)
+std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)>
+generateKernelGeneric(const IPortableTensor *input, bool keep_dims,
+ nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type)
{
switch (input->data_type())
{
case OperandType::FLOAT32:
- return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<float>(keep_dims, reduce_kernel, reduce_type);
case OperandType::INT32:
- return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type);
case OperandType::BOOL8:
- return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type);
+ return evalType<bool>(keep_dims, reduce_kernel, reduce_type);
default:
throw std::runtime_error{"Reduce(generic): unsupported data type"};
}
}
+// TODO Refine this function
void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
const std::vector<int> &axes, bool keep_dims,
nnfw::cker::Reduce &reduce_kernel)
@@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output,
return;
}
- evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel);
+ const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum);
+ kernel(input, output, axes);
}
} // namespace
ReduceLayer::ReduceLayer()
- : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny),
- _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce())
+ : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()),
+ _kernel()
{
// DO NOTHING
}
@@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor
_input = input;
_axes = axes;
_output = output;
- _reduceType = reduceType;
- _keep_dims = keep_dims;
-}
-void ReduceLayer::run()
-{
- const auto axes = getReducerAxes(_axes);
- switch (_reduceType)
+ switch (reduceType)
{
case ReduceType::kSum:
if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
- evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2,
+ std::placeholders::_3, keep_dims, *_reduce_kernel);
return;
}
- evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum);
break;
case ReduceType::kProd:
- evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd);
break;
case ReduceType::kMax:
- evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax);
break;
case ReduceType::kMin:
- evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin);
break;
case ReduceType::kAny:
- evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny);
break;
case ReduceType::kAll:
- evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel);
+ _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll);
break;
default:
throw std::runtime_error{"ReduceSum: Unsupported reduce type"};
}
}
+void ReduceLayer::run()
+{
+ const auto axes = getReducerAxes(_axes);
+ _kernel(_input, _output, axes);
+}
+
} // namespace ops
} // namespace cpu
} // namespace backend
diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h
index 8e7bcdb07..332d399bd 100644
--- a/runtime/onert/backend/cpu/ops/ReduceLayer.h
+++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h
@@ -65,10 +65,11 @@ private:
const IPortableTensor *_input;
const IPortableTensor *_axes;
IPortableTensor *_output;
- ReduceType _reduceType;
- bool _keep_dims;
std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel;
+ std::function<void(const IPortableTensor *input, IPortableTensor *output,
+ const std::vector<int> &axes)>
+ _kernel;
};
} // namespace ops
diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h
deleted file mode 100644
index fc6a46c0d..000000000
--- a/runtime/onert/backend/cpu/ops/RoundLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RoundLayer : public ::onert::exec::IFunction
-{
-public:
- RoundLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void roundFloat32();
-
-private:
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
deleted file mode 100644
index 0bd468f96..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "RsqrtLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void RsqrtLayer::rsqrtFloat32()
-{
- nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; }
-
-void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void RsqrtLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- rsqrtFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- rsqrtQuant8();
- }
- else
- {
- throw std::runtime_error{"Rsqrt: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h
deleted file mode 100644
index 49abbb08d..000000000
--- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class RsqrtLayer : public ::onert::exec::IFunction
-{
-public:
- RsqrtLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void rsqrtFloat32();
- void rsqrtQuant8();
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc
deleted file mode 100644
index 2a6b11753..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SinLayer.h"
-#include "OperationUtils.h"
-
-#include <cker/operation/Elementwise.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-SinLayer::SinLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void SinLayer::sinFloat32()
-{
- nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; }
-
-void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void SinLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- sinFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- sinQuant8();
- }
- else
- {
- throw std::runtime_error{"Sin: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h
deleted file mode 100644
index 348350f41..000000000
--- a/runtime/onert/backend/cpu/ops/SinLayer.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in riting, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
-
-#include <backend/IPortableTensor.h>
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-class SinLayer : public ::onert::exec::IFunction
-{
-public:
- SinLayer();
-
- void configure(const IPortableTensor *input, IPortableTensor *output);
-
- void run() override;
-
-private:
- void sinFloat32();
- void sinQuant8();
-
- const IPortableTensor *_input;
- IPortableTensor *_output;
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
index 6e2bb584a..095e67abc 100644
--- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
+++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc
@@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0)
// DO NOTHING
}
-// Performs softmax along the input of size (input_size * batch_size).
-void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
- float *out)
+void SoftMaxLayer::softmaxFloat32()
{
- assert(input_size > 0);
-
- // For each batch
- for (int b = 0; b < batch_size; b++)
+ if (getNumberOfDimensions(_input) == 1)
{
- // Find the max coeff.
- float max_coeff = in[0];
- for (int i = 1; i < input_size; i++)
- {
- if (in[i] > max_coeff)
- max_coeff = in[i];
- }
-
- // Compute the normalized sum of exps.
- float exp_sum = 0.0;
- for (int i = 0; i < input_size; i++)
- {
- out[i] = std::exp((in[i] - max_coeff) * beta);
- exp_sum += out[i];
- }
-
- // Divide by the sum of exps.
- float reciprocal_sum_exp = 1.f / exp_sum;
- for (int i = 0; i < input_size; i++)
- {
- out[i] *= reciprocal_sum_exp;
- }
-
- // Advance in and out pointers for the next batch.
- in += input_size;
- out += input_size;
+ uint32_t input_size = getNumberOfElements(_input);
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta,
+ reinterpret_cast<float *>(_output->buffer()));
}
-}
-
-void SoftMaxLayer::softmaxFloat32()
-{
- if (getNumberOfDimensions(_input) == 2)
+ else if (getNumberOfDimensions(_input) == 2)
{
uint32_t batch_size = getSizeOfDimension(_input, 0);
if (batch_size == 0)
throw std::runtime_error("batch_size should not be 0");
uint32_t input_size = getNumberOfElements(_input) / batch_size;
- Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta,
- reinterpret_cast<float *>(_output->buffer()));
+ nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size,
+ _beta, reinterpret_cast<float *>(_output->buffer()));
}
else if (getNumberOfDimensions(_input) == 4)
{
@@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32()
}
else
{
- throw std::runtime_error{"only 2D and 4D tensors supported"};
+ throw std::runtime_error{"only 1D, 2D and 4D tensors supported"};
}
}
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc
deleted file mode 100644
index 597d52952..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.cc
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SubLayer.h"
-
-#include <cker/operation/BinaryArithmeticOps.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-void SubLayer::subFloat32()
-{
- float output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.float_activation_max = output_activation_max;
- op_params.float_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void SubLayer::subInt32()
-{
- int32_t output_activation_min = 0, output_activation_max = 0;
- CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer()));
-}
-
-void SubLayer::subQuant8()
-{
- int32_t output_activation_min, output_activation_max;
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
- nnfw::cker::BinaryArithmeticOpParam op_params;
- op_params.quantized_activation_max = output_activation_max;
- op_params.quantized_activation_min = output_activation_min;
- // Parameters for scaled quantized computation
- op_params.left_shift = 20;
- // Zero-points of input and output tensors
- op_params.input1_offset = -_lhs->data_offset();
- op_params.input2_offset = -_rhs->data_offset();
- op_params.output_offset = _output->data_offset();
- assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255));
- assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255));
- assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255));
-
- // Compute normalized scale for _lhs and _rhs values,
- // and represent in 32-bit fixed point
- const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale());
- const double real_lhs_scale = _lhs->data_scale() / norm_max_scale;
- const double real_rhs_scale = _rhs->data_scale() / norm_max_scale;
- // output scale is used to normalize final result, so we invert the scale here
- const double real_output_scale =
- norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift));
-
- // Represent the scales as fixed int32_t multipliers, and int32_t shifts
- QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift);
- QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift);
- op_params.input2_multiplier *= -1;
- QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift);
-
- const bool need_broadcast =
- nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params);
- if (need_broadcast)
- {
- nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
- return;
- }
-
- nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>(
- op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()),
- getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
-}
-
-void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output)
-{
- _lhs = lhs;
- _rhs = rhs;
- _activation = activation;
- _output = output;
-}
-
-void SubLayer::run()
-{
- if (_output->data_type() == OperandType::FLOAT32)
- {
- subFloat32();
- }
- else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- subQuant8();
- }
- else if (_output->data_type() == OperandType::INT32)
- {
- subInt32();
- }
- else
- {
- throw std::runtime_error{"Sub: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h
deleted file mode 100644
index 86f32ca6d..000000000
--- a/runtime/onert/backend/cpu/ops/SubLayer.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
-
-#include <backend/IPortableTensor.h>
-#include "OperationUtils.h"
-
-#include <exec/IFunction.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-class SubLayer : public ::onert::exec::IFunction
-{
-public:
- SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr)
- {
- // DO NOTHING
- }
-
-public:
- void subFloat32();
-
- void subQuant8();
-
- void subInt32();
-
- void configure(const IPortableTensor *lhs, const IPortableTensor *rhs,
- const ir::Activation activation, IPortableTensor *output);
-
- void run() override;
-
-private:
- const IPortableTensor *_lhs;
- const IPortableTensor *_rhs;
- IPortableTensor *_output;
-
- ir::Activation _activation{ir::Activation::NONE};
-};
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__
diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc
deleted file mode 100644
index 910ac1f41..000000000
--- a/runtime/onert/backend/cpu/ops/TanhLayer.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TanhLayer.h"
-
-#include "OperationUtils.h"
-
-#include <cker/operation/Tanh.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-
-TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void TanhLayer::PopulateLookupTable()
-{
- const auto input_scale = static_cast<double>(_input->data_scale());
- const auto input_zero_point = static_cast<int32_t>(_input->data_offset());
- const auto output_scale = static_cast<double>(_output->data_scale());
- const auto output_zero_point = static_cast<int32_t>(_output->data_offset());
- const float inverse_scale = 1 / output_scale;
- int32_t maxval = std::numeric_limits<uint8_t>::max();
- int32_t minval = std::numeric_limits<uint8_t>::min();
- for (int32_t val = minval; val <= maxval; ++val)
- {
- const float dequantized = input_scale * (val - input_zero_point);
- const float transformed = std::tanh(dequantized);
- const float rescaled = std::round(transformed * inverse_scale);
- const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
- _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval));
- }
-}
-
-void TanhLayer::tanhFloat32()
-{
- nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
-}
-
-void TanhLayer::tanhQuant8()
-{
- const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output));
- const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer());
- uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer());
-
- for (int i = 0; i < size; ++i)
- {
- output_data[i] = _table[input_data[i]];
- }
-}
-
-void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
- if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- PopulateLookupTable();
- }
-}
-
-void TanhLayer::run()
-{
- if (_input->data_type() == OperandType::FLOAT32)
- {
- tanhFloat32();
- }
- else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
- {
- tanhQuant8();
- }
- else
- {
- throw std::runtime_error{"Tanh: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
deleted file mode 100644
index ae8084518..000000000
--- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ZerosLikeLayer.h"
-
-#include "OperationUtils.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace cpu
-{
-namespace ops
-{
-ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr)
-{
- // DO NOTHING
-}
-
-void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output)
-{
- _input = input;
- _output = output;
-}
-
-void ZerosLikeLayer::run()
-{
- if (!HaveSameShapes(_input, _output))
- throw std::runtime_error{"ZerosLike: input and output shape don't match."};
-
- auto element_size = getTensorShape(_input).FlatSize();
-
- switch (_input->data_type())
- {
- case OperandType::FLOAT32:
- memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float));
- break;
- case OperandType::INT32:
- memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t));
- break;
- default:
- throw std::runtime_error{"ZerosLike: unsupported data type"};
- }
-}
-
-} // namespace ops
-} // namespace cpu
-} // namespace backend
-} // namespace onert