diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-09-05 21:49:46 +0900 |
commit | 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (patch) | |
tree | 3f991636c1e9423d38eb16a384c20b569b0d678e /runtime/onert | |
parent | 042b262b3633b6c0f577aed6cb4b980ad0c1dcf3 (diff) | |
download | nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.gz nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.tar.bz2 nnfw-74476a2d0296bdad70a2f7f90bc7419a8b05bffd.zip |
Imported Upstream version 1.9.0upstream/1.9.0submit/tizen/20200905.125700accepted/tizen/unified/20200906.032650
Diffstat (limited to 'runtime/onert')
274 files changed, 5864 insertions, 11867 deletions
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h index ef3678b0d..9348df6ae 100644 --- a/runtime/onert/api/include/nnfw.h +++ b/runtime/onert/api/include/nnfw.h @@ -103,6 +103,8 @@ typedef enum { NNFW_STATUS_INVALID_STATE = 3, /** When it is out of memory */ NNFW_STATUS_OUT_OF_MEMORY = 4, + /** When it was given an insufficient output buffer */ + NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5, } NNFW_STATUS; /** diff --git a/runtime/onert/api/include/nnfw_experimental.h b/runtime/onert/api/include/nnfw_experimental.h index 4cd5c585a..94f781988 100644 --- a/runtime/onert/api/include/nnfw_experimental.h +++ b/runtime/onert/api/include/nnfw_experimental.h @@ -62,4 +62,38 @@ typedef struct NNFW_STATUS nnfw_register_custom_op_info(nnfw_session *session, const char *id, custom_kernel_registration_info *info); +/** + * @brief Get the input tensor index by name + * + * This function finds an input tensor of the given name. + * If found, the index value is set to the address that @c index points to, and returns + * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR . + * + * @note If two or more input tensors are of the same name, the one with the lowest index is always + * returned. + * + * @param[in] session the session object + * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string + * @param[out] index the index to be ret + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index); + +/** + * @brief Get the input tensor index by name + * + * This function finds an input tensor of the given name. + * If found, the index value is set to the address that @c index points to, and returns + * @c NNFW_STATUS_NO_ERROR. Otherwise, @c index is unchanged and returns @c NNFW_STATUS_ERROR . + * + * @note If two or more input tensors are of the same name, the one with the lowest index is always + * returned. + * + * @param[in] session the session object + * @param[in] tensorname the name of the tensor to find, a null terminated char pointer string + * @param[out] index the index to be ret + * @return @c NNFW_STATUS_NO_ERROR if successful + */ +NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index); + #endif // __NNFW_EXPERIMENTAL_H__ diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h index 320271a26..42e43760b 100644 --- a/runtime/onert/api/include/nnfw_version.h +++ b/runtime/onert/api/include/nnfw_version.h @@ -21,6 +21,6 @@ * NNFW_VERSION is a uint32 value representing nnfw runtime version * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch */ -#define NNFW_VERSION 0x01000800 +#define NNFW_VERSION 0x01000900 #endif // __NNFW_VERSION_H__ diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc index d65158fd8..ff5e679da 100644 --- a/runtime/onert/api/src/nnfw_api.cc +++ b/runtime/onert/api/src/nnfw_api.cc @@ -33,6 +33,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1); STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_UNEXPECTED_NULL, 2); STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INVALID_STATE, 3); STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_OUT_OF_MEMORY, 4); +STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE, 5); STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_NONE, 0); STATIC_ASSERT_ENUM_CHECK(NNFW_LAYOUT_CHANNELS_LAST, 1); @@ -347,3 +348,15 @@ NNFW_STATUS nnfw_load_circle_from_buffer(nnfw_session *session, uint8_t *buffer, NNFW_RETURN_ERROR_IF_NULL(session); return session->load_circle_from_buffer(buffer, size); } + +NNFW_STATUS nnfw_input_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->input_tensorindex(tensorname, index); +} + +NNFW_STATUS nnfw_output_tensorindex(nnfw_session *session, const char *tensorname, uint32_t *index) +{ + NNFW_RETURN_ERROR_IF_NULL(session); + return session->output_tensorindex(tensorname, index); +} diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc index eb0b743d3..81b40703f 100644 --- a/runtime/onert/api/src/nnfw_api_internal.cc +++ b/runtime/onert/api/src/nnfw_api_internal.cc @@ -18,6 +18,7 @@ #include "CustomKernelRegistry.h" #include "compiler/Compiler.h" #include "util/ConfigSource.h" +#include "util/Exceptions.h" #include "exec/Execution.h" #include "circle_loader.h" #include "tflite_loader.h" @@ -37,6 +38,7 @@ #define MAX_BACKEND_NAME_LENGTH 32 #define MAX_OP_NAME_LENGTH 64 #define MAX_PATH_LENGTH 1024 +#define MAX_TENSOR_NAME_LENGTH 64 // Is null-terminating in length ? static bool null_terminating(const char *str, uint32_t length) @@ -64,6 +66,32 @@ static onert::ir::Layout convertLayout(NNFW_LAYOUT layout) return onert::ir::Layout::UNKNOWN; } +NNFW_STATUS getTensorIndexImpl(const onert::ir::Graph &graph, const char *tensorname, + uint32_t *index, bool is_input) +{ + if (!tensorname || !index) + return NNFW_STATUS_UNEXPECTED_NULL; + + if (!null_terminating(tensorname, MAX_TENSOR_NAME_LENGTH)) + { + std::cerr << "nnpackage path is too long" << std::endl; + return NNFW_STATUS_ERROR; + } + + auto ind_found = is_input ? graph.getInputIndex(tensorname) : graph.getOutputIndex(tensorname); + + if (ind_found.undefined()) + { + // Not found + return NNFW_STATUS_ERROR; + } + else + { + *index = ind_found.value(); + return NNFW_STATUS_NO_ERROR; + } +} + nnfw_session::nnfw_session() : _subgraphs{nullptr}, _execution{nullptr}, _kernel_registry{std::make_shared<onert::frontend::custom::KernelRegistry>()} @@ -213,6 +241,12 @@ NNFW_STATUS nnfw_session::run() { _execution->execute(); } + catch (const onert::InsufficientBufferSizeException &e) + { + // Currently insufficient buffer always means output buffer. + std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl; + return NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE; + } catch (const std::exception &e) { std::cerr << "Error during nnfw_session::run : " << e.what() << std::endl; @@ -447,26 +481,27 @@ NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti) } } + auto ind = primary_subgraph()->getInputs().at(index); + auto &input = primary_subgraph()->operands().at(ind); + + onert::ir::Shape new_shape(ti.rank); + for (int32_t i = 0; i < ti.rank; i++) + new_shape.dim(i) = ti.dims[i]; + + // if passed shape is same with the shape of model, do nothing + if (input.info().shape() == new_shape) + return NNFW_STATUS_NO_ERROR; + if (!isStatePreparedOrFinishedRun()) { // In this case, if we apply input shape in primary_subgraph, it will propagate after // compilation and excution - auto ind = primary_subgraph()->getInputs().at(index); - auto &input = primary_subgraph()->operands().at(ind); - - onert::ir::Shape new_shape(ti.rank); - for (int32_t i = 0; i < ti.rank; i++) - new_shape.dim(i) = ti.dims[i]; // overwrite input shape with the shape from ti input.info().shape(new_shape); } else // when called after nnfw_session::prepare() { - onert::ir::Shape new_shape(ti.rank); - for (int32_t i = 0; i < ti.rank; i++) - new_shape.dim(i) = ti.dims[i]; - _execution->changeInputShape(onert::ir::IOIndex(index), new_shape); } @@ -840,3 +875,13 @@ bool nnfw_session::isStatePreparedOrFinishedRun() { return isStatePrepared() || isStateFinishedRun(); } + +NNFW_STATUS nnfw_session::input_tensorindex(const char *tensorname, uint32_t *index) +{ + return getTensorIndexImpl(*primary_subgraph(), tensorname, index, true); +} + +NNFW_STATUS nnfw_session::output_tensorindex(const char *tensorname, uint32_t *index) +{ + return getTensorIndexImpl(*primary_subgraph(), tensorname, index, false); +} diff --git a/runtime/onert/api/src/nnfw_api_internal.h b/runtime/onert/api/src/nnfw_api_internal.h index 1c3c3706f..604ba38b4 100644 --- a/runtime/onert/api/src/nnfw_api_internal.h +++ b/runtime/onert/api/src/nnfw_api_internal.h @@ -122,8 +122,6 @@ public: NNFW_STATUS input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti); NNFW_STATUS output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti); - NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func); - NNFW_STATUS set_available_backends(const char *backends); NNFW_STATUS set_op_backend(const char *op, const char *backend); @@ -133,9 +131,16 @@ public: NNFW_STATUS set_config(const char *key, const char *value); NNFW_STATUS get_config(const char *key, char *value, size_t value_size); - NNFW_STATUS load_circle_from_buffer(uint8_t *buffer, size_t size); + // + // Experimental API + // + + NNFW_STATUS register_custom_operation(const std::string &id, nnfw_custom_eval eval_func); + NNFW_STATUS input_tensorindex(const char *tensorname, uint32_t *index); + NNFW_STATUS output_tensorindex(const char *tensorname, uint32_t *index); + private: onert::ir::Graph *primary_subgraph(); bool isStateInitialized(); diff --git a/runtime/onert/backend/acl_cl/Backend.h b/runtime/onert/backend/acl_cl/Backend.h index 8aaf516cd..5c5041378 100644 --- a/runtime/onert/backend/acl_cl/Backend.h +++ b/runtime/onert/backend/acl_cl/Backend.h @@ -25,6 +25,7 @@ #include "KernelGenerator.h" #include "TensorManager.h" #include "Optimizer.h" +#include "AclTensorRegistry.h" namespace onert { @@ -47,10 +48,13 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor)); + auto tm = createTensorManager(is_linear_executor); + auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); + auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb); + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc index d7f5f8031..31f1c10eb 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc @@ -24,78 +24,17 @@ namespace acl_cl { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : acl_common::AclConstantInitializer{operands, tensor_reg} { // DO NOTHING } -void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerCopyInitializer(input_index, input_obj); -} - -void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerPermuteInitializer(input_index, input_obj); -} - -void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) -{ - const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); - const auto &block_size_obj = _operands.at(block_size_index); - - if (block_size_obj.isConstant()) - { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; - } -} - -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); - copyInputInitialize(node, ir::operation::Conv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); - copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); -} - void ConstantInitializer::visit(const ir::operation::EmbeddingLookup &node) { copyInputInitialize(node, ir::operation::EmbeddingLookup::LOOKUPS); } -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); - copyInputInitialize(node, ir::operation::FullyConnected::BIAS); -} - void ConstantInitializer::visit(const ir::operation::Gather &node) { copyInputInitialize(node, ir::operation::Gather::INDICES); @@ -107,33 +46,6 @@ void ConstantInitializer::visit(const ir::operation::HashtableLookup &node) copyInputInitialize(node, ir::operation::HashtableLookup::KEYS); } -void ConstantInitializer::visit(const ir::operation::LSTM &node) -{ - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); -} - -void ConstantInitializer::visit(const ir::operation::RNN &node) -{ - copyInputInitialize(node, ir::operation::RNN::WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::BIAS); -} - void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) { const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); @@ -184,13 +96,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) } } -void ConstantInitializer::visit(const ir::operation::TransposeConv &node) -{ - const auto &kernel_index = node.getInputs().at(ir::operation::TransposeConv::KERNEL); - const auto &kernel_obj = _operands.at(kernel_index); - registerPermuteInitializer(kernel_index, kernel_obj); -} - } // namespace acl_cl } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.h b/runtime/onert/backend/acl_cl/ConstantInitializer.h index c51f72b11..4f894fd31 100644 --- a/runtime/onert/backend/acl_cl/ConstantInitializer.h +++ b/runtime/onert/backend/acl_cl/ConstantInitializer.h @@ -17,9 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_CL_CONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> -#include "TensorBuilder.h" +#include "AclConstantInitializer.h" namespace onert { @@ -28,32 +26,18 @@ namespace backend namespace acl_cl { -class ConstantInitializer : public IConstantInitializer +class ConstantInitializer : public acl_common::AclConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::HashtableLookup &) override; - void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::TransposeConv &) override; - -private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } - void copyInputInitialize(const ir::Operation &node, uint32_t index); - void permuteInputInitialize(const ir::Operation &node, uint32_t index); - -private: - std::shared_ptr<TensorBuilder> _tensor_builder; + using acl_common::AclConstantInitializer::visit; + void visit(const ir::operation::EmbeddingLookup &) final; + void visit(const ir::operation::Gather &) final; + void visit(const ir::operation::HashtableLookup &) final; + void visit(const ir::operation::SpaceToBatchND &) final; }; } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.cc b/runtime/onert/backend/acl_cl/KernelGenerator.cc index a84f983b4..94489253d 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.cc +++ b/runtime/onert/backend/acl_cl/KernelGenerator.cc @@ -40,15 +40,16 @@ namespace backend namespace acl_cl { -using ::onert::backend::acl_common::asAclClFunction; +using ::onert::backend::acl_common::asAclFunction; using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder< - ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>; + ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclFunction>; -KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx, - const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder) +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -77,51 +78,69 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); assert(_ctx.at(block_size_index).data()); - auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>(); + auto fn = acl_common::generateLayer<arm_compute::CLBatchToSpaceLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Cast &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + const auto activation = node.param().activation; - std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->data_type() == ofm_tensor->data_type()) - { - auto l = std::make_unique<::arm_compute::CLCopy>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); + const auto act_info = acl_common::asActivationLayerInfo(activation); - fn = std::move(l); - } - else + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().arithmetic_type) { - auto l = std::make_unique<::arm_compute::CLCast>(); - - // TODO Support converting float to int32 as round down - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); - - fn = std::move(l); + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticAddition>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE, act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticSubtraction>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE, act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + { + fn = acl_common::generateLayer<arm_compute::CLPixelWiseMultiplication>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN, + act_info); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + { + fn = acl_common::generateLayer<arm_compute::CLArithmeticDivision>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), act_info); + break; + } + default: + assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations"); + break; } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Conv2D &node) @@ -145,22 +164,20 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) ker_width, ker_height); const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); - auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(), - ::arm_compute::Size2D(1U, 1U), act_info); + auto fn = acl_common::generateLayer<arm_compute::CLConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info, + ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) @@ -185,50 +202,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); { - auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, multiplier, act_info); + auto fn = acl_common::generateLayer<arm_compute::CLDepthwiseConvolutionLayer>( + ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), + conv_info, multiplier, act_info); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } } -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -250,70 +240,44 @@ void KernelGenerator::visit(const ir::operation::Concat &node) return; } - auto output_tensor = _tensor_builder->at(ofm_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get(); std::vector<::arm_compute::ICLTensor *> input_tensors; for (auto &ifm_ind : input_indexes) - input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle()); std::unique_ptr<::arm_compute::IFunction> fn; if (input_indexes.size() < 2) { - auto l = std::make_unique<::arm_compute::CLCopy>(); - l->configure(input_tensors.at(0), output_tensor->handle()); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensors.at(0), + output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::CLConcatenateLayer>(); const auto rank = _ctx.at(ofm_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); - l->configure(input_tensors, output_tensor->handle(), fixed_axis); - fn = std::move(l); + fn = acl_common::generateLayer<::arm_compute::CLConcatenateLayer>( + input_tensors, output_tensor->handle(), fixed_axis); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::FullyConnected &node) { const auto output_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->at(output_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); const auto activation = node.param().activation; - auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor, + auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ICLTensor, ::arm_compute::CLFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale - arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Reduce &node) { const auto output_index{node.getOutputs().at(0)}; @@ -322,8 +286,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto keep_dims{node.param().keep_dims}; const auto reduce_type = node.param().reduce_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); @@ -334,29 +298,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) std::unique_ptr<arm_compute::IFunction> fn; if (reduce_type == ir::operation::Reduce::ReduceType::MEAN) { - auto l = std::make_unique<::arm_compute::CLReduceMean>(); - const auto acl_axes = acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout); - l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLReduceMean>(input_tensor->handle(), acl_axes, + keep_dims, output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::CLReduceOperation>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout); - l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims, - acl_common::convertReduceType(reduce_type)); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLReduceOperation>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), acl_axes, keep_dims, acl_common::convertReduceType(reduce_type)); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reshape &node) @@ -364,8 +320,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. @@ -376,13 +332,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) UNUSED_RELEASE(frontend_layout); UNUSED_RELEASE(backend_layout); - auto fn = std::make_unique<::arm_compute::CLReshapeLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Squeeze &node) @@ -398,32 +351,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) (void)dims; (void)ndim; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto fn = std::make_unique<arm_compute::CLReshapeLayer>(); - fn->configure(input_tensor->handle(), output_tensor->handle()); - auto acl_fn = asAclClFunction(std::move(fn)); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Tanh &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::CLActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Softmax &node) @@ -433,17 +365,14 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(input_tensor->handle(), output_tensor->handle(), beta); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), beta); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Slice &node) @@ -453,8 +382,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -506,13 +435,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) ends_set.set(i, ends[i]); } - auto fn = std::make_unique<::arm_compute::CLSlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::StridedSlice &node) @@ -523,8 +449,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -597,14 +523,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) strides_set.set(i, strides[i]); } - auto fn = std::make_unique<::arm_compute::CLStridedSlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, - strides_set, begin_mask, end_mask, shrink_axis_mask); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLStridedSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set, + begin_mask, end_mask, shrink_axis_mask); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Transpose &node) @@ -615,8 +538,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) const auto rank = _ctx.at(ifm_idx).shape().rank(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -625,93 +548,168 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector( rank, pv, frontend_layout, backend_layout); - auto fn = std::make_unique<::arm_compute::CLPermute>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Add &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); + auto fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), backend_pv); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Sub &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; + const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>(); + const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo( + node.param().op_type, node.param().alpha, node.param().beta); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); + auto fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), act_info); - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Div &node) +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + { + fn = acl_common::generateLayer<arm_compute::CLBinaryLogicalOp>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle(), + arm_compute::BinaryLogicalOperation::AND); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + { + fn = acl_common::generateLayer<arm_compute::CLBitwiseOr>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + { + fn = acl_common::generateLayer<arm_compute::CLElementwiseMax>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + { + fn = acl_common::generateLayer<arm_compute::CLElementwiseMin>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + default: + { + std::string err_msg("acl_cl KernelGenerator : " + node.name() + + "is not elementwise-binary operations"); + assert(false && err_msg.c_str()); + break; + } + } - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Exp &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::CLExpLayer>(); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseUnary::Type::ABS: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - fn->configure(input_tensor->handle(), output_tensor->handle()); + fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + case ir::operation::ElementwiseUnary::Type::CAST: + { + if (input_tensor->data_type() == output_tensor->data_type()) + { + fn = acl_common::generateLayer<arm_compute::CLCopy>(input_tensor->handle(), + output_tensor->handle()); + ; + } + else + { + // TODO Support converting float to int32 as round down + fn = acl_common::generateLayer<arm_compute::CLCast>( + input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); + } + break; + } + case ir::operation::ElementwiseUnary::Type::DEQUANTIZE: + { + fn = acl_common::generateLayer<arm_compute::CLDequantizationLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::EXP: + { + fn = acl_common::generateLayer<arm_compute::CLExpLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::FLOOR: + { + fn = acl_common::generateLayer<arm_compute::CLFloor>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + { + fn = acl_common::generateLayer<arm_compute::CLBitwiseNot>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::NEG: + { + fn = acl_common::generateLayer<arm_compute::CLNeg>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::RSQRT: + { + fn = acl_common::generateLayer<arm_compute::CLRsqrtLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::SQRT: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - auto acl_fn = asAclClFunction(std::move(fn)); + fn = acl_common::generateLayer<arm_compute::CLActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + default: + { + throw std::runtime_error("acl_cl KernelGenerator : " + node.name() + "is not supported yet"); + break; + } + } + + auto acl_fn = asAclFunction(std::move(fn)); _return_fn = std::move(acl_fn); } @@ -721,16 +719,13 @@ void KernelGenerator::visit(const ir::operation::ExpandDims &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLReshapeLayer>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - fn->configure(input_tensor->handle(), output_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::CLReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::InstanceNorm &node) @@ -740,67 +735,25 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node) const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto gamma_tensor = _tensor_builder->at(gamma_index).get(); - auto beta_tensor = _tensor_builder->at(beta_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get(); + auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get(); auto epsilon = node.param().epsilon; auto activation = node.param().activation; - auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), - beta_tensor->handle(), epsilon); + auto fn = acl_common::generateLayer<arm_compute::CLInstanceNormalizationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(), + epsilon); _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Logistic &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalAnd &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - ::arm_compute::BinaryLogicalOperation::AND); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::LSTM &node) { - _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor, - ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder); + _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ICLTensor, + ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_reg); } void KernelGenerator::visit(const ir::operation::Comparison &node) @@ -811,18 +764,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto comparison_type = node.param().comparison_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLComparison>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - (arm_compute::ComparisonOperation)comparison_type); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get(); + auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLComparison>( + input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), + (arm_compute::ComparisonOperation)comparison_type); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Pack &node) @@ -836,26 +786,24 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : node.getInputs()) input_indexes.emplace_back(input_index); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); std::vector<arm_compute::ICLTensor *> inputs; for (const auto &input_index : input_indexes) - inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout(); if (axis < 0) axis += output_rank; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLStackLayer>(); - // Disable applied dim_correction std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes; for (const auto &input_index : input_indexes) { size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape()); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) @@ -866,7 +814,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) } } - fn->configure(inputs, axis, output); + auto fn = acl_common::generateLayer<arm_compute::CLStackLayer>(inputs, axis, output); // Revert disabling applied dim_correction assert(inputs.size() == orig_inputs_acl_tensor_shapes.size()); @@ -875,7 +823,21 @@ void KernelGenerator::visit(const ir::operation::Pack &node) inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i)); } - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); +} + +void KernelGenerator::visit(const ir::operation::Pool2D &node) +{ + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( + node, _ctx, _tensor_reg, _current_op_seq_layout, + acl_common::convertPoolType(node.param().op_type)); + + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + const auto activation = node.param().activation; + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Permute &node) @@ -883,8 +845,8 @@ void KernelGenerator::visit(const ir::operation::Permute &node) const auto ofm_idx{node.getOutputs().at(0)}; const auto ifm_idx{node.getInputs().at(0)}; const auto permute_type = node.getPermuteType(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto rank = _ctx.at(ofm_idx).shape().rank(); assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); @@ -895,70 +857,23 @@ void KernelGenerator::visit(const ir::operation::Permute &node) // WHCN -> CWHN pv = arm_compute::PermutationVector{2, 0, 1}; - auto l = std::make_unique<::arm_compute::CLPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) { // CWHN -> WHCN pv = arm_compute::PermutationVector{1, 2, 0}; - auto l = std::make_unique<::arm_compute::CLPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<::arm_compute::CLPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else { - auto l = std::make_unique<::arm_compute::CLCopy>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::CLCopy>(ifm_tensor->handle(), ofm_tensor->handle()); } - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::CLActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) @@ -967,58 +882,32 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - auto fn = std::make_unique<::arm_compute::CLScale>(); + auto fn = acl_common::generateLayer<arm_compute::CLScale>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR, + ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f), + ::arm_compute::SamplingPolicy::TOP_LEFT); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), - ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, - ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU1 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::ReLU6 &node) +void KernelGenerator::visit(const ir::operation::ResizeNearestNeighbor &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; + const auto ifm_index{node.getInputs().at(ir::operation::ResizeNearestNeighbor::Input::INPUT)}; - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); + auto fn = acl_common::generateLayer<arm_compute::CLScale>( + ifm_tensor->handle(), ofm_tensor->handle(), + ::arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, ::arm_compute::BorderMode::REPLICATE, + ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::RNN &node) @@ -1036,43 +925,25 @@ void KernelGenerator::visit(const ir::operation::RNN &node) const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto weights_tensor = _tensor_builder->at(weights_index).get(); - auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); - auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get(); + auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); + auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get(); auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - auto copy_layer = std::make_unique<::arm_compute::CLCopy>(); - copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); - _return_fn = asAclClFunction(std::move(copy_layer)); + auto copy_layer = acl_common::generateLayer<arm_compute::CLCopy>( + hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); + _return_fn = asAclFunction(std::move(copy_layer)); - auto fn = std::make_unique<::arm_compute::CLRNNLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_tensor->handle(), weights_tensor->handle(), - recurrent_weights_tensor->handle(), bias_tensor->handle(), - hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::Floor &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLFloor>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto fn = acl_common::generateLayer<arm_compute::CLRNNLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(), + hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) @@ -1083,24 +954,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); - auto paddings_tensor = _tensor_builder->at(paddings_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); + auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get(); assert(_ctx.at(block_size_index).data()); assert(_ctx.at(paddings_index).data()); - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>(); - l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), - ofm_tensor->handle()); - fn = std::move(l); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLSpaceToBatchLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), + ofm_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) @@ -1110,29 +976,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) auto block_size = node.param().block_size; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); -void KernelGenerator::visit(const ir::operation::L2Pool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2); + auto fn = acl_common::generateLayer<arm_compute::CLSpaceToDepthLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), block_size); - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclClFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) @@ -1141,17 +991,14 @@ void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); - - auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>(); - - fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLEmbeddingLookup>( + values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::L2Normalization &node) @@ -1173,19 +1020,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) float bias = 0.0f; // Don't offset the reduction. - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, radius, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::HashtableLookup &node) @@ -1197,21 +1041,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node) const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hits_tensor = _tensor_builder->at(hits_index).get(); - - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto keys_tensor = _tensor_builder->at(keys_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); - - auto fn = std::make_unique<::arm_compute::CLHashtableLookup>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get(); - fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), - output_tensor->handle(), hits_tensor->handle()); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLHashtableLookup>( + lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), + output_tensor->handle(), hits_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::PReLU &node) @@ -1220,17 +1061,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node) const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto alpha_tensor = _tensor_builder->at(alpha_index).get(); - - auto fn = std::make_unique<::arm_compute::CLPReluLayer>(); - - fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLPReluLayer>( + ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TransposeConv &node) @@ -1258,77 +1096,18 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); } - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); - auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), - tconv_info, invalid_horizontal, invalid_vertical); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::SQRT &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBitwiseOr>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalNot &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLBitwiseNot>(); + auto fn = acl_common::generateLayer<arm_compute::CLTransposeConvLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, invalid_horizontal, + invalid_vertical); - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SquaredDifference &node) @@ -1337,17 +1116,14 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node) const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>(); + auto fn = acl_common::generateLayer<arm_compute::CLElementwiseSquaredDiff>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TopKV2 &node) @@ -1364,17 +1140,14 @@ void KernelGenerator::visit(const ir::operation::TopKV2 &node) const auto k = node.param().k; - auto values_tensor = _tensor_builder->at(outputValues_index).get(); - auto indices_tensor = _tensor_builder->at(outputIndices_index).get(); - auto input_tensor = _tensor_builder->at(inputData_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(outputValues_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(outputIndices_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(inputData_index).get(); - auto fn = std::make_unique<::arm_compute::CLTopKV2>(); + auto fn = acl_common::generateLayer<arm_compute::CLTopKV2>( + input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle()); - fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Gather &node) @@ -1389,9 +1162,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw); const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto indices_tensor = _tensor_builder->at(indices_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get(); // NOTE The frontend layout and backend layout must be the same for this operation. // If not the same, we have to add a stage(?) to perform permutation of output tensor. It @@ -1407,8 +1180,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); - auto fn = std::make_unique<::arm_compute::CLGatherEx>(); - // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; assert(n == ifm_tensor->num_dimensions()); @@ -1433,52 +1204,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node) acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false)); } - fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); + auto fn = acl_common::generateLayer<arm_compute::CLGatherEx>( + ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); // Revert disabling applied dim_correction ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape); indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLNeg>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - - auto fn = std::make_unique<::arm_compute::CLActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ArgMax &node) @@ -1491,8 +1224,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) assert((ifm_shape.rank() - 1) == ofm_shape.rank()); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); auto frontend_layout = _current_op_seq_layout; auto backend_layout = ifm_tensor->layout(); @@ -1506,31 +1239,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) auto acl_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>(); + auto fn = acl_common::generateLayer<arm_compute::CLArgMinMaxLayer>( + ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), + ::arm_compute::ReductionOperation::ARG_IDX_MAX); - fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(), - ::arm_compute::ReductionOperation::ARG_IDX_MAX); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Dequantize &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) @@ -1544,19 +1257,16 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod auto beta = node.param().beta; auto bias = node.param().bias; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo( ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLNormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthToSpace &node) @@ -1567,16 +1277,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node) auto block_size = node.param().block_size; assert(block_size > 0); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), block_size); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto acl_fn = asAclClFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::CLDepthToSpaceLayer>( + input_tensor->handle(), output_tensor->handle(), block_size); - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Split &node) @@ -1590,10 +1297,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &output : node.getOutputs()) output_indexes.emplace_back(output); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); std::vector<arm_compute::ICLTensor *> output_tensors; for (const auto &ofm_ind : output_indexes) - output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle()); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1602,11 +1309,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) axis += ifm_rank; axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::CLSplit>(); - - fn->configure(ifm_tensor->handle(), output_tensors, axis); + auto fn = + acl_common::generateLayer<arm_compute::CLSplit>(ifm_tensor->handle(), output_tensors, axis); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Unpack &node) @@ -1620,13 +1326,13 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : node.getOutputs()) output_indexes.emplace_back(output_index); - auto input = _tensor_builder->at(input_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); std::vector<arm_compute::ICLTensor *> outputs; for (const auto &output_index : output_indexes) - outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); if (axis < 0) axis += input_rank; axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); @@ -1636,7 +1342,7 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : output_indexes) { size_t output_rank = _ctx.at(output_index).shape().rank(); - const auto &output_tensor = _tensor_builder->at(output_index); + const auto &output_tensor = _tensor_reg->getAclTensor(output_index); orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape()); assert(output_rank == output_tensor->num_dimensions()); if (output_rank != output_tensor->info()->num_dimensions()) @@ -1647,11 +1353,9 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) } } - auto fn = std::make_unique<::arm_compute::CLUnstack>(); - - fn->configure(input, outputs, axis); + auto fn = acl_common::generateLayer<arm_compute::CLUnstack>(input, outputs, axis); - _return_fn = asAclClFunction(std::move(fn)); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Pad &node) @@ -1669,11 +1373,11 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()); const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info); - auto input = _tensor_builder->at(input_index).get()->handle(); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); ::arm_compute::PaddingList padding_list; padding_list.resize(rank); @@ -1685,11 +1389,10 @@ void KernelGenerator::visit(const ir::operation::Pad &node) acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; } - auto fn = std::make_unique<::arm_compute::CLPadLayer>(); // Disable applied dim_correction size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) { @@ -1698,50 +1401,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node) _ctx.at(input_index).shape(), frontend_layout, backend_layout, false)); } - fn->configure(input, output, padding_list, pixel_value); + auto fn = + acl_common::generateLayer<arm_compute::CLPadLayer>(input, output, padding_list, pixel_value); // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension // It would produce a mistach of result - _return_fn = asAclClFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLElementwiseMin>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::CLElementwiseMax>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node) @@ -1749,17 +1415,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, - 0); + auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node) @@ -1767,17 +1429,13 @@ void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node) const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, - 0); + auto fn = acl_common::generateLayer<arm_compute::CLDepthConvertLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE, 0); - auto acl_fn = asAclClFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } } // namespace acl_cl diff --git a/runtime/onert/backend/acl_cl/KernelGenerator.h b/runtime/onert/backend/acl_cl/KernelGenerator.h index 1e3b06489..d188d6d83 100644 --- a/runtime/onert/backend/acl_cl/KernelGenerator.h +++ b/runtime/onert/backend/acl_cl/KernelGenerator.h @@ -21,6 +21,8 @@ #include "ir/Operands.h" #include "TensorBuilder.h" +#include "AclTensorRegistry.h" +#include "TensorManager.h" namespace onert { @@ -33,70 +35,52 @@ class KernelGenerator : public IKernelGenerator { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::Mul &) override; void visit(const ir::operation::Reduce &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::Transpose &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Sub &) override; - void visit(const ir::operation::Cast &) override; - void visit(const ir::operation::Div &) override; - void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::InstanceNorm &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::LogicalAnd &) override; void visit(const ir::operation::LSTM &) override; void visit(const ir::operation::Pack &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; - void visit(const ir::operation::RSQRT &) override; - void visit(const ir::operation::ReLU &) override; void visit(const ir::operation::ResizeBilinear &) override; - void visit(const ir::operation::ReLU1 &) override; - void visit(const ir::operation::ReLU6 &) override; + void visit(const ir::operation::ResizeNearestNeighbor &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::Floor &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; - void visit(const ir::operation::L2Pool2D &) override; void visit(const ir::operation::EmbeddingLookup &) override; void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::TransposeConv &) override; - void visit(const ir::operation::SQRT &) override; - void visit(const ir::operation::LogicalOr &) override; - void visit(const ir::operation::LogicalNot &) override; void visit(const ir::operation::SquaredDifference &) override; void visit(const ir::operation::TopKV2 &) override; void visit(const ir::operation::Gather &) override; - void visit(const ir::operation::Neg &) override; - void visit(const ir::operation::Abs &) override; void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::Dequantize &) override; void visit(const ir::operation::LocalResponseNormalization &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::Split &) override; void visit(const ir::operation::Unpack &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Max &) override; void visit(const ir::operation::ConvertFp32ToFp16 &) override; void visit(const ir::operation::ConvertFp16ToFp32 &) override; @@ -104,6 +88,7 @@ private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; ir::Layout _current_op_seq_layout; }; diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc index 6ba3143e8..9134d3fb8 100644 --- a/runtime/onert/backend/acl_cl/Optimizer.cc +++ b/runtime/onert/backend/acl_cl/Optimizer.cc @@ -19,7 +19,7 @@ #include "ParentInfo.h" #include <cassert> -#include <ir/LoweredGraph.h> +#include <compiler/LoweredGraph.h> #include <util/logging.h> #include "AclSubTensorAnalyzer.h" diff --git a/runtime/onert/backend/acl_cl/TensorManager.h b/runtime/onert/backend/acl_cl/TensorManager.h index bdbd0364e..ab295dbec 100644 --- a/runtime/onert/backend/acl_cl/TensorManager.h +++ b/runtime/onert/backend/acl_cl/TensorManager.h @@ -56,7 +56,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager< using TensorManager = acl_common::AclTensorManager<operand::ICLTensor, operand::CLTensor, operand::CLSubTensor>; -TensorManager *createTensorManager(bool is_linear_executor) +inline TensorManager *createTensorManager(bool is_linear_executor) { if (is_linear_executor) { diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.cc b/runtime/onert/backend/acl_common/AclConstantInitializer.cc new file mode 100644 index 000000000..6ad5b7b69 --- /dev/null +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "AclConstantInitializer.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +AclConstantInitializer::AclConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : IConstantInitializer{operands}, _tensor_reg{tensor_reg} +{ + // DO NOTHING +} + +void AclConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) +{ + assert(node.getInputs().size() > index); + + const auto &input_index = node.getInputs().at(index); + const auto &input_obj = _operands.at(input_index); + registerCopyInitializer(input_index, input_obj); +} + +void AclConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) +{ + assert(node.getInputs().size() > index); + + const auto &input_index = node.getInputs().at(index); + const auto &input_obj = _operands.at(input_index); + registerPermuteInitializer(input_index, input_obj); +} + +void AclConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) +{ + const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); + const auto &block_size_obj = _operands.at(block_size_index); + + if (block_size_obj.isConstant()) + { + _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { + assert(model_obj.data()); + const auto &shape = model_obj.shape(); + const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); + assert(model_obj.shape().rank() == 1); + obj.access([&](ITensor &tensor) { + for (size_t i = 0; i < shape.num_elements(); ++i) + { + const int32_t value = base[shape.num_elements() - i - 1]; + int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + + tensor.calcOffset({static_cast<int32_t>(i)})); + *into = value; + } + }); + }; + } +} + +void AclConstantInitializer::visit(const ir::operation::Conv2D &node) +{ + permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); + copyInputInitialize(node, ir::operation::Conv2D::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) +{ + permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); + copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::FullyConnected &node) +{ + copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); + copyInputInitialize(node, ir::operation::FullyConnected::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::LSTM &node) +{ + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); + copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); + copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::RNN &node) +{ + copyInputInitialize(node, ir::operation::RNN::WEIGHTS); + copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); + copyInputInitialize(node, ir::operation::RNN::BIAS); +} + +void AclConstantInitializer::visit(const ir::operation::TransposeConv &node) +{ + permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL); +} + +} // namespace acl_common +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h new file mode 100644 index 000000000..52f4c54cf --- /dev/null +++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ +#define __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ + +#include <backend/IConstantInitializer.h> +#include <ir/Operands.h> +#include "AclTensorRegistry.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +class AclConstantInitializer : public IConstantInitializer +{ +public: + AclConstantInitializer(const ir::Operands &operands, + const std::shared_ptr<ITensorRegistry> &tensor_reg); + +public: + void visit(const ir::operation::BatchToSpaceND &) override; + void visit(const ir::operation::Conv2D &) override; + void visit(const ir::operation::DepthwiseConv2D &) override; + void visit(const ir::operation::FullyConnected &) override; + void visit(const ir::operation::LSTM &) override; + void visit(const ir::operation::RNN &) override; + void visit(const ir::operation::TransposeConv &) override; + +protected: + void copyInputInitialize(const ir::Operation &node, uint32_t index); + void permuteInputInitialize(const ir::Operation &node, uint32_t index); + +private: + std::shared_ptr<ITensorRegistry> tensor_registry() const final { return _tensor_reg; } + +protected: + std::shared_ptr<ITensorRegistry> _tensor_reg; +}; + +} // namespace acl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_COMPILER_ACL_COMMON_ACLCONSTANT_INITIALIZER_H__ diff --git a/runtime/onert/backend/acl_common/AclFunction.h b/runtime/onert/backend/acl_common/AclFunction.h index 85b18e847..94b65863a 100644 --- a/runtime/onert/backend/acl_common/AclFunction.h +++ b/runtime/onert/backend/acl_common/AclFunction.h @@ -47,12 +47,6 @@ private: std::unique_ptr<::arm_compute::IFunction> _func; }; -class AclClFunction : public AclFunction -{ -public: - using AclFunction::AclFunction; -}; - } // namespace acl_common } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_common/AclKernelGen.h b/runtime/onert/backend/acl_common/AclKernelGen.h index 9f7ce3764..372ce689e 100644 --- a/runtime/onert/backend/acl_common/AclKernelGen.h +++ b/runtime/onert/backend/acl_common/AclKernelGen.h @@ -30,11 +30,32 @@ namespace backend namespace acl_common { +template <typename Layer, typename... Args> +std::unique_ptr<arm_compute::IFunction> generateLayer(Args &&... args) +{ + auto l = std::make_unique<Layer>(); + + l->configure(std::forward<Args>(args)...); + + return l; +} + +template <typename Layer, typename... Args> +std::unique_ptr<arm_compute::IFunction> +generateLayer(std::shared_ptr<arm_compute::IMemoryManager> memory_manager, Args &&... args) +{ + auto l = std::make_unique<Layer>(memory_manager); + + l->configure(std::forward<Args>(args)...); + + return l; +} + template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer, - typename T_TensorBuilder> -std::unique_ptr<exec::IFunction> -kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder) + typename T_TensorRegistry> +std::unique_ptr<exec::IFunction> kernelGenLSTM(const ir::operation::LSTM &node, + const ir::Operands &operands, + const std::shared_ptr<T_TensorRegistry> &tensor_reg) { // TODO Support dynamic rnn // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection. @@ -117,43 +138,44 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, const auto projection_clip = projection_threshold; assert(cell_clip >= 0.f && projection_clip >= 0.f); - auto scratch_buffer_tensor = tensor_builder->at(scratch_buffer_index).get(); - auto output_state_out_tensor = tensor_builder->at(output_state_out_index).get(); - auto cell_state_out_tensor = tensor_builder->at(cell_state_out_index).get(); - auto output_tensor = tensor_builder->at(output_index).get(); + auto scratch_buffer_tensor = tensor_reg->getAclTensor(scratch_buffer_index).get(); + auto output_state_out_tensor = tensor_reg->getAclTensor(output_state_out_index).get(); + auto cell_state_out_tensor = tensor_reg->getAclTensor(cell_state_out_index).get(); + auto output_tensor = tensor_reg->getAclTensor(output_index).get(); - auto input_tensor = tensor_builder->at(input_index).get(); + auto input_tensor = tensor_reg->getAclTensor(input_index).get(); - auto input_to_forget_weights_tensor = tensor_builder->at(input_to_forget_weights_index).get(); - auto input_to_cell_weights_tensor = tensor_builder->at(input_to_cell_weights_index).get(); - auto input_to_output_weights_tensor = tensor_builder->at(input_to_output_weights_index).get(); + auto input_to_forget_weights_tensor = + tensor_reg->getAclTensor(input_to_forget_weights_index).get(); + auto input_to_cell_weights_tensor = tensor_reg->getAclTensor(input_to_cell_weights_index).get(); + auto input_to_output_weights_tensor = + tensor_reg->getAclTensor(input_to_output_weights_index).get(); auto recurrent_to_forget_weights_tensor = - tensor_builder->at(recurrent_to_forget_weights_index).get(); - auto recurrent_to_cell_weights_tensor = tensor_builder->at(recurrent_to_cell_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_forget_weights_index).get(); + auto recurrent_to_cell_weights_tensor = + tensor_reg->getAclTensor(recurrent_to_cell_weights_index).get(); auto recurrent_to_output_weights_tensor = - tensor_builder->at(recurrent_to_output_weights_index).get(); + tensor_reg->getAclTensor(recurrent_to_output_weights_index).get(); - auto forget_gate_bias_tensor = tensor_builder->at(forget_gate_bias_index).get(); - auto cell_bias_tensor = tensor_builder->at(cell_bias_index).get(); - auto output_gate_bias_tensor = tensor_builder->at(output_gate_bias_index).get(); - auto output_state_in_tensor = tensor_builder->at(output_state_in_index).get(); - auto cell_state_in_tensor = tensor_builder->at(cell_state_in_index).get(); + auto forget_gate_bias_tensor = tensor_reg->getAclTensor(forget_gate_bias_index).get(); + auto cell_bias_tensor = tensor_reg->getAclTensor(cell_bias_index).get(); + auto output_gate_bias_tensor = tensor_reg->getAclTensor(output_gate_bias_index).get(); + auto output_state_in_tensor = tensor_reg->getAclTensor(output_state_in_index).get(); + auto cell_state_in_tensor = tensor_reg->getAclTensor(cell_state_in_index).get(); - auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - - auto fn = std::make_unique<T_ACLLayer>(); + auto act_info = asActivationLayerInfo(activation); ::arm_compute::LSTMParams<T_Tensor> lstm_params{}; if (has_cifg_param) { auto input_to_input_weights_tensor = - tensor_builder->at(input_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(input_to_input_weights_index).get(); // optional auto recurrent_to_input_weights_tensor = - tensor_builder->at(recurrent_to_input_weights_index).get(); // optional + tensor_reg->getAclTensor(recurrent_to_input_weights_index).get(); // optional auto cell_to_input_weights_handle = - has_peephole_param ? tensor_builder->at(cell_to_input_weights_index).get()->handle() + has_peephole_param ? tensor_reg->getAclTensor(cell_to_input_weights_index).get()->handle() : nullptr; // optional (non-cifg && peephole) - auto input_gate_bias_tensor = tensor_builder->at(input_gate_bias_index).get(); // optional + auto input_gate_bias_tensor = tensor_reg->getAclTensor(input_gate_bias_index).get(); // optional lstm_params.set_cifg_params(input_to_input_weights_tensor->handle(), recurrent_to_input_weights_tensor->handle(), cell_to_input_weights_handle, input_gate_bias_tensor->handle()); @@ -161,40 +183,42 @@ kernelGenLSTM(const ir::operation::LSTM &node, const ir::Operands &operands, if (has_peephole_param) { auto cell_to_forget_weights_tensor = - tensor_builder->at(cell_to_forget_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_forget_weights_index).get(); // optional auto cell_to_output_weights_tensor = - tensor_builder->at(cell_to_output_weights_index).get(); // optional + tensor_reg->getAclTensor(cell_to_output_weights_index).get(); // optional lstm_params.set_peephole_params(cell_to_forget_weights_tensor->handle(), cell_to_output_weights_tensor->handle()); } if (has_projection_param) { - auto projection_weights_tensor = tensor_builder->at(projection_weights_index).get(); // optional - auto projection_bias_handle = has_projection_bias - ? tensor_builder->at(projection_bias_index).get()->handle() - : nullptr; // optional + auto projection_weights_tensor = + tensor_reg->getAclTensor(projection_weights_index).get(); // optional + auto projection_bias_handle = + has_projection_bias ? tensor_reg->getAclTensor(projection_bias_index).get()->handle() + : nullptr; // optional lstm_params.set_projection_params(projection_weights_tensor->handle(), projection_bias_handle); } - fn->configure(input_tensor->handle(), input_to_forget_weights_tensor->handle(), - input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), - recurrent_to_forget_weights_tensor->handle(), - recurrent_to_cell_weights_tensor->handle(), - recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), - cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), - output_state_in_tensor->handle(), cell_state_in_tensor->handle(), - scratch_buffer_tensor->handle(), output_state_out_tensor->handle(), - cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, - cell_clip, projection_clip); + auto fn = generateLayer<T_ACLLayer>( + input_tensor->handle(), input_to_forget_weights_tensor->handle(), + input_to_cell_weights_tensor->handle(), input_to_output_weights_tensor->handle(), + recurrent_to_forget_weights_tensor->handle(), recurrent_to_cell_weights_tensor->handle(), + recurrent_to_output_weights_tensor->handle(), forget_gate_bias_tensor->handle(), + cell_bias_tensor->handle(), output_gate_bias_tensor->handle(), + output_state_in_tensor->handle(), cell_state_in_tensor->handle(), + scratch_buffer_tensor->handle(), output_state_out_tensor->handle(), + cell_state_out_tensor->handle(), output_tensor->handle(), lstm_params, act_info, cell_clip, + projection_clip); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } template <typename T_FunctionWrapper, typename T_Tensor, typename T_ACLLayer, - typename T_TensorBuilder> + typename T_TensorBuilder, typename T_TensorRegistry> std::unique_ptr<exec::IFunction> kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout) + const std::shared_ptr<T_TensorBuilder> &tensor_builder, + const std::shared_ptr<T_TensorRegistry> &tensor_reg, ir::Layout layout) { using ir::operation::FullyConnected; @@ -236,16 +260,13 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope reshape.dim(1) = input_size; /* W */ } - auto output_tensor = tensor_builder->at(output_index).get(); - const auto input_tensor = tensor_builder->at(input_index).get(); - const auto weight_tensor = tensor_builder->at(weight_index).get(); - const auto bias_tensor = tensor_builder->at(bias_index).get(); + auto output_tensor = tensor_reg->getAclTensor(output_index).get(); + const auto input_tensor = tensor_reg->getAclTensor(input_index).get(); + const auto weight_tensor = tensor_reg->getAclTensor(weight_index).get(); + const auto bias_tensor = tensor_reg->getAclTensor(bias_index).get(); const auto frontend_layout = layout; const auto acl_layout = output_tensor->handle()->info()->data_layout(); - auto fn = - std::make_unique<T_ACLLayer>(tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - typename T_ACLLayer::KernelType kernel_type = T_ACLLayer::KernelType::GENERAL; if (operands.at(weight_index).isConstant()) { @@ -253,20 +274,18 @@ kernelGenFullyConnected(const ir::operation::FullyConnected &node, const ir::Ope assert(operands.at(weight_index).data()); } - fn->configure( - input_tensor->handle(), weight_tensor->handle(), bias_tensor->handle(), - output_tensor->handle(), needs_reshape, - ::onert::backend::acl_common::asTensorShape( - reshape, frontend_layout, ::onert::backend::acl_common::asRuntimeLayout(acl_layout)), - kernel_type); + auto fn = generateLayer<T_ACLLayer>( + tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weight_tensor->handle(), bias_tensor->handle(), output_tensor->handle(), needs_reshape, + asTensorShape(reshape, frontend_layout, asRuntimeLayout(acl_layout)), kernel_type); return std::make_unique<T_FunctionWrapper>(std::move(fn)); } -template <typename T_ACLLayer, typename T_PoolOp, typename T_TensorBuilder> +template <typename T_ACLLayer, typename T_PoolOp, typename T_AclTensorRegistry> std::unique_ptr<::arm_compute::IFunction> kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, - const std::shared_ptr<T_TensorBuilder> &tensor_builder, ir::Layout layout, + const std::shared_ptr<T_AclTensorRegistry> &tensor_reg, ir::Layout layout, ::arm_compute::PoolingType pooling_type) { const auto ofm_index{node.getOutputs().at(0)}; @@ -294,16 +313,14 @@ kernelGenPool2D(const T_PoolOp &node, const ir::Operands &operands, VERBOSE(Pool2DParam) << "PAD(L): " << padding.left << std::endl; VERBOSE(Pool2DParam) << "PAD(R): " << padding.right << std::endl; - auto ofm_tensor = tensor_builder->at(ofm_index).get(); - auto ifm_tensor = tensor_builder->at(ifm_index).get(); + auto ofm_tensor = tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = tensor_reg->getAclTensor(ifm_index).get(); ::arm_compute::PoolingLayerInfo info{ pooling_type, ::arm_compute::Size2D{kw, kh}, ifm_tensor->info()->data_layout(), - acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */}; - - auto fn = std::make_unique<T_ACLLayer>(); + asPadStrideInfo(padding, stride), true /* exclude_padding */}; - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), info); + auto fn = generateLayer<T_ACLLayer>(ifm_tensor->handle(), ofm_tensor->handle(), info); return fn; } diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h index 6b03fdf7f..91452014b 100644 --- a/runtime/onert/backend/acl_common/AclTensorBuilder.h +++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h @@ -25,6 +25,7 @@ #include "ir/OperandIndexMap.h" #include <ir/Operands.h> #include "AclTensorManager.h" +#include "AclTensorRegistry.h" #include <memory> #include "ParentInfo.h" #include <util/Utils.h> @@ -48,7 +49,8 @@ class AclTensorBuilder : public ITensorBuilder public: using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>; - AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr); + AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr, + const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg); /** * @brief Register tensor information to allocate on ACL-CL backend @@ -63,19 +65,13 @@ public: void notifyLastUse(const ir::OperandIndex &) override; bool isRegistered(const ir::OperandIndex &) const override; - std::shared_ptr<backend::ITensorRegistry> tensorRegistry() override { return nullptr; } void prepare(void) override; void allocate() override; void postFunctionPrepare() override; - std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override; - void iterate(const IterateFunction &fn) override; - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; - std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind); - T_AclTensorManager *acl_tensor_manager(void) { return _tensor_mgr.get(); } void setUsesCount(const ir::OperandIndex &index, size_t num_uses) @@ -100,8 +96,6 @@ public: */ bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child); - bool supportDynamicTensor() override { return false; } - private: void buildTensors(void); ir::OperandIndex findRootParent(ir::OperandIndex index); @@ -113,6 +107,7 @@ private: ir::OperandIndexMap<size_t> _uses_count_map; std::unique_ptr<T_AclTensorManager> _tensor_mgr; + std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> _tensor_reg; // for linear executor std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq; @@ -140,9 +135,10 @@ namespace acl_common { template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder(const ir::Operands &operands, - T_AclTensorManager *tensor_mgr) - : _operands{operands}, _tensor_mgr{tensor_mgr} +AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::AclTensorBuilder( + const ir::Operands &operands, T_AclTensorManager *tensor_mgr, + const std::shared_ptr<AclTensorRegistry<T_AclTensorManager>> &tensor_reg) + : _operands{operands}, _tensor_mgr{tensor_mgr}, _tensor_reg{tensor_reg} { assert(_tensor_mgr); } @@ -310,28 +306,6 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::postFunctionPrepare(voi } template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -std::shared_ptr<ITensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::tensorAt(const ir::OperandIndex &ind) -{ - return _tensor_mgr->at(ind); -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::iterate(const IterateFunction &fn) -{ - _tensor_mgr->iterate(fn); -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> -std::shared_ptr<T_ITensor> -AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::at(const ir::OperandIndex &ind) -{ - auto ret = _tensor_mgr->at(ind); - assert(ret != nullptr); - return ret; -} - -template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> std::unique_ptr<ITensorManager> AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::releaseStaticTensorManager(void) { diff --git a/runtime/onert/backend/acl_common/AclTensorRegistry.h b/runtime/onert/backend/acl_common/AclTensorRegistry.h new file mode 100644 index 000000000..1ef9f4b35 --- /dev/null +++ b/runtime/onert/backend/acl_common/AclTensorRegistry.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ + +#include "backend/ITensorRegistry.h" + +namespace onert +{ +namespace backend +{ +namespace acl_common +{ + +/** + * @brief Tensor registry class for acl backends + * + * This is implemented as a wrapper of AclTensorManager. + */ +template <typename T_AclTensorManager> class AclTensorRegistry : public ITensorRegistry +{ +public: + AclTensorRegistry(T_AclTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {} + + std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override + { + return _tensor_mgr->at(ind); + } + + std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override + { + return getITensor(ind); + } + + auto getAclTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind); } + +private: + T_AclTensorManager *_tensor_mgr; +}; + +} // namespace acl_common +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_ACL_COMMON_ACL_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/backend/acl_common/Convert.cc b/runtime/onert/backend/acl_common/Convert.cc index a5bbe1691..67dcc8192 100644 --- a/runtime/onert/backend/acl_common/Convert.cc +++ b/runtime/onert/backend/acl_common/Convert.cc @@ -18,6 +18,7 @@ #include "Swizzle.h" #include "ir/DataType.h" +#include "ir/operation/ElementwiseActivation.h" #include <memory> namespace @@ -177,6 +178,50 @@ namespace acl_common } } +::arm_compute::ActivationLayerInfo +asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha, + float beta) +{ + switch (op_type) + { + case ir::operation::ElementwiseActivation::Type::RELU: + if (beta == 0.f) + { + if (alpha == ir::operation::ElementwiseActivation::infinity) + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; + } + else + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, alpha}; + } + } + else + { + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, alpha, beta}; + } + case ir::operation::ElementwiseActivation::Type::TANH: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, alpha, beta}; + case ir::operation::ElementwiseActivation::Type::LOGISTIC: + // NOTE The sigmoid function is a special case of the Logistic function when L=1, k=1, x0=0. + // TODO In ACL and nnapi sepc, currently, Logistic's L always is 1, k always is 1, x0 always + // 0(always sigmoid) regardless of values of the parameter. + // If ACL support non-sigmoid logistic, should fix param values. + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; + case ir::operation::ElementwiseActivation::Type::LEAKY_RELU: + return ::arm_compute::ActivationLayerInfo{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::LEAKY_RELU, alpha}; + default: + throw std::runtime_error{"Not supported, yet"}; + break; + } +} + arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank, ir::Layout frontend_layout, ir::Layout backend_layout) { @@ -223,11 +268,6 @@ std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunct return std::make_unique<AclFunction>(std::move(layer)); } -std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer) -{ - return std::make_unique<AclClFunction>(std::move(layer)); -} - ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout) { switch (data_layout) @@ -265,6 +305,21 @@ ir::DataType asRuntimeDataType(::arm_compute::DataType data_type) } } +arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir) +{ + switch (pool_type_ir) + { + case ir::operation::Pool2D::PoolType::AVG: + return arm_compute::PoolingType::AVG; + case ir::operation::Pool2D::PoolType::L2: + return arm_compute::PoolingType::L2; + case ir::operation::Pool2D::PoolType::MAX: + return arm_compute::PoolingType::MAX; + default: + throw std::runtime_error("convertPoolType: Not supported operation yet"); + } +} + arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir) { switch (reduce_type_ir) diff --git a/runtime/onert/backend/acl_common/Convert.h b/runtime/onert/backend/acl_common/Convert.h index 9362098a5..380321c07 100644 --- a/runtime/onert/backend/acl_common/Convert.h +++ b/runtime/onert/backend/acl_common/Convert.h @@ -25,7 +25,9 @@ #include "ir/Layout.h" #include "ir/InternalType.h" #include "ir/Operand.h" +#include "ir/operation/Pool2D.h" #include "ir/operation/Reduce.h" +#include "ir/operation/ElementwiseActivation.h" #include "ir/Shape.h" #include "ir/TypeInfo.h" #include "ir/Coordinates.h" @@ -59,6 +61,9 @@ namespace acl_common const ir::Stride &stride); ::arm_compute::ActivationLayerInfo asActivationLayerInfo(ir::Activation act_code); +::arm_compute::ActivationLayerInfo +asActivationLayerInfo(const ir::operation::ElementwiseActivation::Type op_type, float alpha, + float beta); arm_compute::Coordinates asCoordinates(const ir::Operand &operand, int32_t rank, ir::Layout frontend_layout, ir::Layout backend_layout); @@ -67,7 +72,6 @@ std::set<uint32_t> asSet(const ir::Operand &operand, int32_t rank, ir::Layout fr ir::Layout backend_layout); std::unique_ptr<AclFunction> asAclFunction(std::unique_ptr<::arm_compute::IFunction> &&layer); -std::unique_ptr<AclClFunction> asAclClFunction(std::unique_ptr<::arm_compute::IFunction> &&layer); template <typename T_Function> std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> &&fn) @@ -78,6 +82,7 @@ std::unique_ptr<T_Function> asFunction(std::unique_ptr<::arm_compute::IFunction> ir::Layout asRuntimeLayout(::arm_compute::DataLayout data_layout); ir::DataType asRuntimeDataType(::arm_compute::DataType data_type); +arm_compute::PoolingType convertPoolType(ir::operation::Pool2D::PoolType pool_type_ir); arm_compute::ReduceOperation convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir); } // namespace acl_common diff --git a/runtime/onert/backend/acl_neon/Backend.h b/runtime/onert/backend/acl_neon/Backend.h index a0b145e19..35d6e4e8e 100644 --- a/runtime/onert/backend/acl_neon/Backend.h +++ b/runtime/onert/backend/acl_neon/Backend.h @@ -48,10 +48,13 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(operands, createTensorManager(is_linear_executor)); + auto tm = createTensorManager(is_linear_executor); + auto tr = std::make_shared<acl_common::AclTensorRegistry<TensorManager>>(tm); + auto tb = std::make_shared<TensorBuilder>(operands, tm, tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb); + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr); context->tensor_register = nullptr; context->optimizer = std::make_shared<Optimizer>(context.get()); return context; diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc index 4191b277f..79edb9ded 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc @@ -24,100 +24,12 @@ namespace acl_neon { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : acl_common::AclConstantInitializer{operands, tensor_reg} { // DO NOTHING } -void ConstantInitializer::copyInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerCopyInitializer(input_index, input_obj); -} - -void ConstantInitializer::permuteInputInitialize(const ir::Operation &node, uint32_t index) -{ - assert(node.getInputs().size() > index); - - const auto &input_index = node.getInputs().at(index); - const auto &input_obj = _operands.at(input_index); - registerPermuteInitializer(input_index, input_obj); -} - -void ConstantInitializer::visit(const ir::operation::BatchToSpaceND &node) -{ - const auto &block_size_index = node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE); - const auto &block_size_obj = _operands.at(block_size_index); - - if (block_size_obj.isConstant()) - { - _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) { - assert(model_obj.data()); - const auto &shape = model_obj.shape(); - const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base()); - assert(model_obj.shape().rank() == 1); - obj.access([&](ITensor &tensor) { - for (size_t i = 0; i < shape.num_elements(); ++i) - { - const int32_t value = base[shape.num_elements() - i - 1]; - int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() + - tensor.calcOffset({static_cast<int32_t>(i)})); - *into = value; - } - }); - }; - } -} - -void ConstantInitializer::visit(const ir::operation::Conv2D &node) -{ - permuteInputInitialize(node, ir::operation::Conv2D::KERNEL); - copyInputInitialize(node, ir::operation::Conv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node) -{ - permuteInputInitialize(node, ir::operation::DepthwiseConv2D::KERNEL); - copyInputInitialize(node, ir::operation::DepthwiseConv2D::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::FullyConnected &node) -{ - copyInputInitialize(node, ir::operation::FullyConnected::WEIGHT); - copyInputInitialize(node, ir::operation::FullyConnected::BIAS); -} - -void ConstantInitializer::visit(const ir::operation::LSTM &node) -{ - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_CELL_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::RECURRENT_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_INPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_FORGET_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::CELL_TO_OUTPUT_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::INPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::FORGET_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::OUTPUT_GATE_BIAS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_WEIGHTS); - copyInputInitialize(node, ir::operation::LSTM::PROJECTION_BIAS); -} - -void ConstantInitializer::visit(const ir::operation::RNN &node) -{ - copyInputInitialize(node, ir::operation::RNN::WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::RECURRENT_WEIGHTS); - copyInputInitialize(node, ir::operation::RNN::BIAS); -} - void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) { const auto &block_size_index = node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE); @@ -173,11 +85,6 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node) } } -void ConstantInitializer::visit(const ir::operation::TransposeConv &node) -{ - permuteInputInitialize(node, ir::operation::TransposeConv::KERNEL); -} - } // namespace acl_neon } // namespace backend } // namespace onert diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.h b/runtime/onert/backend/acl_neon/ConstantInitializer.h index 6b4c1f145..c7d71cdcf 100644 --- a/runtime/onert/backend/acl_neon/ConstantInitializer.h +++ b/runtime/onert/backend/acl_neon/ConstantInitializer.h @@ -17,9 +17,7 @@ #ifndef __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_ACL_NEON_CONSTANT_INITIALIZER_H__ -#include <backend/IConstantInitializer.h> -#include <ir/Operands.h> -#include "TensorBuilder.h" +#include "AclConstantInitializer.h" namespace onert { @@ -28,29 +26,15 @@ namespace backend namespace acl_neon { -class ConstantInitializer : public IConstantInitializer +class ConstantInitializer : public acl_common::AclConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: - void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Conv2D &) override; - void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::FullyConnected &) override; - void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::TransposeConv &) override; - -private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } - void copyInputInitialize(const ir::Operation &node, uint32_t index); - void permuteInputInitialize(const ir::Operation &node, uint32_t index); - -private: - std::shared_ptr<TensorBuilder> _tensor_builder; + using acl_common::AclConstantInitializer::visit; + void visit(const ir::operation::SpaceToBatchND &node) final; }; } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.cc b/runtime/onert/backend/acl_neon/KernelGenerator.cc index 1195b83cc..6d53c1245 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.cc +++ b/runtime/onert/backend/acl_neon/KernelGenerator.cc @@ -44,11 +44,12 @@ using ::onert::backend::acl_common::asAclFunction; using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder< ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>; -KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx, - const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder) +KernelGenerator::KernelGenerator( + const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &tensor_reg) : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder), - _current_op_seq_layout(ir::Layout::UNKNOWN) + _tensor_reg(tensor_reg), _current_op_seq_layout(ir::Layout::UNKNOWN) { // DO NOTHING } @@ -70,26 +71,6 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) } } -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::ArgMax &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -97,8 +78,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto ifm_rank = _ctx.at(ifm_index).shape().rank(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); auto frontend_layout = _current_op_seq_layout; auto backend_layout = ifm_tensor->layout(); @@ -111,14 +92,11 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto fixed_axis = acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>(); - - fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), - arm_compute::ReductionOperation::ARG_IDX_MAX); + auto fn = acl_common::generateLayer<arm_compute::NEArgMinMaxLayer>( + ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(), + arm_compute::ReductionOperation::ARG_IDX_MAX); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) @@ -128,50 +106,67 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto block_size_index{ node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); assert(_ctx.at(block_size_index).data()); - auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>(); - - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEBatchToSpaceLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Cast &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->data_type() == ofm_tensor->data_type()) - { - auto l = std::make_unique<::arm_compute::NECopy>(); + const auto activation = node.param().activation; - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn = std::move(l); - } - else + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().arithmetic_type) { - auto l = std::make_unique<::arm_compute::NECast>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); - - fn = std::move(l); + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + { + fn = acl_common::generateLayer<arm_compute::NEArithmeticAddition>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + { + fn = acl_common::generateLayer<arm_compute::NEArithmeticSubtraction>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), + arm_compute::ConvertPolicy::SATURATE); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + { + // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO + fn = acl_common::generateLayer<arm_compute::NEPixelWiseMultiplication>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale + arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseDivision>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + break; + } + default: + assert(false && "The BinaryArithmetic operation supports only binary arithmetic operations"); + break; } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); } void KernelGenerator::visit(const ir::operation::Conv2D &node) @@ -195,20 +190,18 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) ker_width, ker_height); const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); - auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(), - ::arm_compute::Size2D(1U, 1U), act_info); + auto fn = acl_common::generateLayer<arm_compute::NEConvolutionLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), ifm_tensor->handle(), + ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), conv_info, + ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -221,16 +214,13 @@ void KernelGenerator::visit(const ir::operation::DepthToSpace &node) auto block_size = node.param().block_size; assert(block_size > 0); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NEDepthToSpaceLayer>( + input_tensor->handle(), output_tensor->handle(), block_size); - fn->configure(input_tensor->handle(), output_tensor->handle(), block_size); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) @@ -255,67 +245,23 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); const auto conv_info = acl_common::asPadStrideInfo(padding, stride); const auto act_info = acl_common::asActivationLayerInfo(activation); { - auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>(); - - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), - ofm_tensor->handle(), conv_info, multiplier, act_info); + auto fn = acl_common::generateLayer<arm_compute::NEDepthwiseConvolutionLayer>( + ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(), ofm_tensor->handle(), + conv_info, multiplier, act_info); _return_fn = asAclFunction(std::move(fn)); } } -void KernelGenerator::visit(const ir::operation::Dequantize &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -336,80 +282,223 @@ void KernelGenerator::visit(const ir::operation::Concat &node) return; } - auto output_tensor = _tensor_builder->at(ofm_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(ofm_index).get(); std::vector<::arm_compute::ITensor *> input_tensors; for (const auto &ifm_ind : input_indexes) - input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle()); + input_tensors.emplace_back(_tensor_reg->getAclTensor(ifm_ind)->handle()); std::unique_ptr<::arm_compute::IFunction> fn; if (input_indexes.size() < 2) { - auto l = std::make_unique<::arm_compute::NECopy>(); - l->configure(input_tensors.at(0), output_tensor->handle()); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensors.at(0), + output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEConcatenateLayer>(); const auto rank = _ctx.at(ofm_index).shape().rank(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = output_tensor->layout(); const auto fixed_axis = acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value(); - l->configure(input_tensors, output_tensor->handle(), fixed_axis); - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEConcatenateLayer>( + input_tensors, output_tensor->handle(), fixed_axis); } - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { - const auto output_index{node.getOutputs().at(0)}; - const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; - const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; + + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + + const ::arm_compute::ActivationLayerInfo act_info = acl_common::asActivationLayerInfo( + node.param().op_type, node.param().alpha, node.param().beta); - auto output_tensor = _tensor_builder->at(output_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); + std::unique_ptr<arm_compute::IFunction> fn; + if (node.param().op_type == ir::operation::ElementwiseActivation::Type::LOGISTIC) + { + // NOTE NEActivationLayer can generate produce erroneous results. it were caused by + // 'vexpq_f32()'. + // The neon function returns a value outside of the limit of representation in float as 'NaN' + // instead of 'INF', and then the result of this op will be errors due to the 'NaN'. + fn = acl_common::generateLayer<arm_compute::NEActivationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), act_info); + } + else + { + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>(ifm_tensor->handle(), + ofm_tensor->handle(), act_info); + } - auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>(); + _return_fn = asAclFunction(std::move(fn)); +} - fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto acl_fn = asAclFunction(std::move(fn)); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - _return_fn = std::move(acl_fn); + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND: + { + fn = acl_common::generateLayer<arm_compute::NELogicalAnd>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + { + fn = acl_common::generateLayer<arm_compute::NELogicalOr>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseMax>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + { + fn = acl_common::generateLayer<arm_compute::NEElementwiseMin>( + lhs_tensor->handle(), rhs_tensor->handle(), output_tensor->handle()); + break; + } + default: + { + std::string err_msg("acl_neon KernelGenerator : " + node.name() + + "is not elementwise-binary operations"); + assert(false && err_msg.c_str()); + break; + } + } + _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Floor &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)}; + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; + + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + + std::unique_ptr<arm_compute::IFunction> fn; + switch (node.param().op_type) + { + case ir::operation::ElementwiseUnary::Type::ABS: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + case ir::operation::ElementwiseUnary::Type::CAST: + { + if (input_tensor->data_type() == output_tensor->data_type()) + { + fn = acl_common::generateLayer<arm_compute::NECopy>(input_tensor->handle(), + output_tensor->handle()); + } + else + { + fn = acl_common::generateLayer<arm_compute::NECast>( + input_tensor->handle(), output_tensor->handle(), arm_compute::ConvertPolicy::SATURATE); + } + break; + } + case ir::operation::ElementwiseUnary::Type::DEQUANTIZE: + { + fn = acl_common::generateLayer<arm_compute::NEDequantizationLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::EXP: + { + fn = acl_common::generateLayer<arm_compute::NEExpLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::FLOOR: + { + fn = acl_common::generateLayer<arm_compute::NEFloor>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + { + fn = acl_common::generateLayer<arm_compute::NEBitwiseNot>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::NEG: + { + fn = acl_common::generateLayer<arm_compute::NENegLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::RSQRT: + { + fn = acl_common::generateLayer<arm_compute::NERsqrtLayer>(input_tensor->handle(), + output_tensor->handle()); + break; + } + case ir::operation::ElementwiseUnary::Type::SQRT: + { + const ::arm_compute::ActivationLayerInfo act_info{ + ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - auto fn = std::make_unique<::arm_compute::NEFloor>(); + fn = acl_common::generateLayer<arm_compute::NEActivationLayer>( + input_tensor->handle(), output_tensor->handle(), act_info); + break; + } + default: + { + throw std::runtime_error("acl_neon KernelGenerator : " + node.name() + + "is not supported yet"); + break; + } + } + _return_fn = asAclFunction(std::move(fn)); +} - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); +void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)}; + const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)}; - auto acl_fn = asAclFunction(std::move(fn)); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - _return_fn = std::move(acl_fn); + auto fn = acl_common::generateLayer<arm_compute::NEEmbeddingLookup>( + values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle()); + + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::FullyConnected &node) { const auto output_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->at(output_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); const auto activation = node.param().activation; auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor, ::arm_compute::NEFullyConnectedReshapingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout); + node, _ctx, _tensor_builder, _tensor_reg, _current_op_seq_layout); _return_fn = std::make_unique<exec::FunctionSequence>( std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle())); } @@ -423,21 +512,18 @@ void KernelGenerator::visit(const ir::operation::HashtableLookup &node) const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)}; const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hits_tensor = _tensor_builder->at(hits_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hits_tensor = _tensor_reg->getAclTensor(hits_index).get(); - auto lookups_tensor = _tensor_builder->at(lookups_index).get(); - auto keys_tensor = _tensor_builder->at(keys_index).get(); - auto values_tensor = _tensor_builder->at(values_index).get(); + auto lookups_tensor = _tensor_reg->getAclTensor(lookups_index).get(); + auto keys_tensor = _tensor_reg->getAclTensor(keys_index).get(); + auto values_tensor = _tensor_reg->getAclTensor(values_index).get(); - auto fn = std::make_unique<::arm_compute::NEHashtableLookup>(); + auto fn = acl_common::generateLayer<arm_compute::NEHashtableLookup>( + lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), + output_tensor->handle(), hits_tensor->handle()); - fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(), - output_tensor->handle(), hits_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Gather &node) @@ -453,9 +539,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) // Converting in reverse order const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value(); - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto indices_tensor = _tensor_builder->at(indices_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_index).get(); const auto backend_layout = ofm_tensor->layout(); UNUSED_RELEASE(backend_layout); @@ -471,8 +557,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) assert(backend_layout == indices_tensor->layout()); assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout); - auto fn = std::make_unique<::arm_compute::NEGatherEx>(); - // input is n-D, indices k-D, output is (n + k - 1)-D size_t n = ifm_rank; assert(n == ifm_tensor->num_dimensions()); @@ -495,15 +579,14 @@ void KernelGenerator::visit(const ir::operation::Gather &node) acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false)); } - fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); + auto fn = acl_common::generateLayer<arm_compute::NEGatherEx>( + ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis); // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would // use arm_compute::TensorInfo::offset_element_in_bytes() // It would create an error when the kernel accesses high dimension that its value is 1 - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::InstanceNorm &node) @@ -513,17 +596,16 @@ void KernelGenerator::visit(const ir::operation::InstanceNorm &node) const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)}; const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto gamma_tensor = _tensor_builder->at(gamma_index).get(); - auto beta_tensor = _tensor_builder->at(beta_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto gamma_tensor = _tensor_reg->getAclTensor(gamma_index).get(); + auto beta_tensor = _tensor_reg->getAclTensor(beta_index).get(); auto epsilon = node.param().epsilon; auto activation = node.param().activation; - auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), - beta_tensor->handle(), epsilon); + auto fn = acl_common::generateLayer<arm_compute::NEInstanceNormalizationLayerEx>( + ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(), beta_tensor->handle(), + epsilon); _return_fn = std::make_unique<exec::FunctionSequence>( asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); @@ -548,32 +630,16 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction) float bias = 0.0f; // Don't offset the reduction. - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, radius, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::L2Pool2D &node) -{ - auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( - node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2); - - const auto ofm_index{node.getOutputs().at(0)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - const auto activation = node.param().activation; - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(raw_fn)), - ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node) @@ -587,142 +653,22 @@ void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &nod auto beta = node.param().beta; auto bias = node.param().bias; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); const auto norm_info = ::arm_compute::NormalizationLayerInfo( ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false); - auto fn = std::make_unique<::arm_compute::NENormalizationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - - auto acl_fn = asAclFunction(std::move(fn)); + auto fn = acl_common::generateLayer<arm_compute::NENormalizationLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), norm_info); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalAnd &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NELogicalAnd>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalNot &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEBitwiseNot>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)}; - const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NELogicalOr>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Logistic &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC}; - - // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'. - // The neon function returns a value outside of the limit of representation in float as 'NaN' - // instead of 'INF', and then the result of this op will be errors due to the 'NaN'. - auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::LSTM &node) { _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor, - ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder); -} - -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>(); - - // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale - arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NENegLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_reg); } void KernelGenerator::visit(const ir::operation::Pack &node) @@ -736,25 +682,23 @@ void KernelGenerator::visit(const ir::operation::Pack &node) for (const auto &input_index : node.getInputs()) input_indexes.emplace_back(input_index); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); std::vector<arm_compute::ITensor *> inputs; for (const auto &input_index : input_indexes) - inputs.emplace_back(_tensor_builder->at(input_index)->handle()); + inputs.emplace_back(_tensor_reg->getAclTensor(input_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(output_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(output_index).get()->layout(); if (axis < 0) axis += output_rank; axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEStackLayer>(); - // Disable applied dim_correction for (const auto &input_index : input_indexes) { size_t input_rank = _ctx.at(input_index).shape().rank(); - const auto &input_tensor = _tensor_builder->at(input_index); + const auto &input_tensor = _tensor_reg->getAclTensor(input_index); assert(input_rank == input_tensor->num_dimensions()); if (input_rank != input_tensor->info()->num_dimensions()) { @@ -764,7 +708,7 @@ void KernelGenerator::visit(const ir::operation::Pack &node) } } - fn->configure(inputs, axis, output); + auto fn = acl_common::generateLayer<arm_compute::NEStackLayer>(inputs, axis, output); // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would // use arm_compute::TensorInfo::offset_element_in_bytes() @@ -783,8 +727,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node) auto rank = _ctx.at(input_index).shape().rank(); auto pad_base = _ctx.at(pad_index).data()->base(); - auto input = _tensor_builder->at(input_index).get()->handle(); - auto output = _tensor_builder->at(output_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); + auto output = _tensor_reg->getAclTensor(output_index).get()->handle(); ::arm_compute::PaddingList padding_list; padding_list.resize(rank); @@ -793,7 +737,7 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); const auto axis = acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value(); padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]}; @@ -807,19 +751,33 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const auto pixel_value = ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info()); - auto fn = std::make_unique<::arm_compute::NEPadLayer>(); - fn->configure(input, output, padding_list, pixel_value); + auto fn = + acl_common::generateLayer<arm_compute::NEPadLayer>(input, output, padding_list, pixel_value); _return_fn = asAclFunction(std::move(fn)); } +void KernelGenerator::visit(const ir::operation::Pool2D &node) +{ + auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>( + node, _ctx, _tensor_reg, _current_op_seq_layout, + acl_common::convertPoolType(node.param().op_type)); + + const auto ofm_index{node.getOutputs().at(0)}; + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + const auto activation = node.param().activation; + _return_fn = std::make_unique<exec::FunctionSequence>( + asAclFunction(std::move(raw_fn)), + ActivationBuilder::generate(activation, ofm_tensor->handle())); +} + void KernelGenerator::visit(const ir::operation::Permute &node) { const auto ofm_idx{node.getOutputs().at(0)}; const auto ifm_idx{node.getInputs().at(0)}; const auto permute_type = node.getPermuteType(); - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto rank = _ctx.at(ofm_idx).shape().rank(); assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank()); @@ -830,35 +788,22 @@ void KernelGenerator::visit(const ir::operation::Permute &node) // WHCN -> CWHN pv = arm_compute::PermutationVector{2, 0, 1}; - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4) { // CWHN -> WHCN pv = arm_compute::PermutationVector{1, 2, 0}; - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), pv); } else { - auto l = std::make_unique<::arm_compute::NECopy>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NECopy>(ifm_tensor->handle(), ofm_tensor->handle()); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::PReLU &node) @@ -867,21 +812,14 @@ void KernelGenerator::visit(const ir::operation::PReLU &node) const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)}; const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto alpha_tensor = _tensor_builder->at(alpha_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto alpha_tensor = _tensor_reg->getAclTensor(alpha_index).get(); - std::unique_ptr<::arm_compute::IFunction> fn; - - auto l = std::make_unique<::arm_compute::NEPReluLayer>(); - - l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + auto fn = acl_common::generateLayer<arm_compute::NEPReluLayer>( + ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reduce &node) @@ -890,8 +828,8 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)}; const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // Convert to ACL axes taking into account negative values and possible duplicates. const auto &axes = _ctx.at(axes_index); @@ -906,93 +844,21 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) std::unique_ptr<::arm_compute::IFunction> fn; if (reduce_type == ir::operation::Reduce::ReduceType::MEAN) { - auto l = std::make_unique<::arm_compute::NEReduceMean>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceMean>(input_tensor->handle(), reduce_axes, + keep_dims, output_tensor->handle()); } else if (reduce_type == ir::operation::Reduce::ReduceType::SUM) { - auto l = std::make_unique<::arm_compute::NEReduceSum>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceSum>(input_tensor->handle(), reduce_axes, + keep_dims, output_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEReduceOperation>(); - - l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(), - acl_common::convertReduceType(reduce_type)); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEReduceOperation>( + input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(), + acl_common::convertReduceType(reduce_type)); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::NEActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU1 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU6 &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Reshape &node) @@ -1000,8 +866,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); // NOTE This operation must not be changed the layout from frontend to backend // So, PermutationOperationPass makes layouts of frontend and backend the same. @@ -1012,13 +878,10 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) UNUSED_RELEASE(frontend_layout); UNUSED_RELEASE(backend_layout); - auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) @@ -1027,18 +890,15 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NEScale>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), - ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE, - ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT); + auto fn = acl_common::generateLayer<arm_compute::NEScale>( + ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::InterpolationPolicy::BILINEAR, + ::arm_compute::BorderMode::REPLICATE, ::arm_compute::PixelValue(0.f), + ::arm_compute::SamplingPolicy::TOP_LEFT); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::RNN &node) @@ -1056,40 +916,24 @@ void KernelGenerator::visit(const ir::operation::RNN &node) const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto hidden_state_out_tensor = _tensor_reg->getAclTensor(hidden_state_out_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto weights_tensor = _tensor_builder->at(weights_index).get(); - auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get(); - auto bias_tensor = _tensor_builder->at(bias_index).get(); - auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto weights_tensor = _tensor_reg->getAclTensor(weights_index).get(); + auto recurrent_weights_tensor = _tensor_reg->getAclTensor(recurrent_weights_index).get(); + auto bias_tensor = _tensor_reg->getAclTensor(bias_index).get(); + auto hidden_state_in_tensor = _tensor_reg->getAclTensor(hidden_state_in_index).get(); auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation); - auto copy_layer = std::make_unique<::arm_compute::NECopy>(); - copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); + auto copy_layer = acl_common::generateLayer<arm_compute::NECopy>( + hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle()); _return_fn = asAclFunction(std::move(copy_layer)); - auto fn = std::make_unique<::arm_compute::NERNNLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - fn->configure(input_tensor->handle(), weights_tensor->handle(), - recurrent_weights_tensor->handle(), bias_tensor->handle(), - hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); - _return_fn = asAclFunction(std::move(fn)); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NERsqrtLayer>(); - - fn->configure(ifm_tensor->handle(), ofm_tensor->handle()); - + auto fn = acl_common::generateLayer<arm_compute::NERNNLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + weights_tensor->handle(), recurrent_weights_tensor->handle(), bias_tensor->handle(), + hidden_state_out_tensor->handle(), output_tensor->handle(), act_info); _return_fn = asAclFunction(std::move(fn)); } @@ -1105,32 +949,11 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) (void)dims; (void)ndim; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - auto fn = std::make_unique<arm_compute::NEReshapeLayer>(); - fn->configure(input_tensor->handle(), output_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Tanh &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<arm_compute::NEActivationLayer>(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f}; - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Softmax &node) @@ -1139,8 +962,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = input_tensor->layout(); @@ -1154,14 +977,11 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false)); } - auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>( - _tensor_builder->acl_tensor_manager()->internal_buffer_manager()); - - fn->configure(input_tensor->handle(), output_tensor->handle(), beta); + auto fn = acl_common::generateLayer<arm_compute::NESoftmaxLayer>( + _tensor_builder->acl_tensor_manager()->internal_buffer_manager(), input_tensor->handle(), + output_tensor->handle(), beta); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) @@ -1172,22 +992,19 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)}; const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto block_size_tensor = _tensor_builder->at(block_size_index).get(); - auto paddings_tensor = _tensor_builder->at(paddings_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto block_size_tensor = _tensor_reg->getAclTensor(block_size_index).get(); + auto paddings_tensor = _tensor_reg->getAclTensor(paddings_index).get(); assert(_ctx.at(block_size_index).data()); assert(_ctx.at(paddings_index).data()); - auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>(); - - fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), - ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NESpaceToBatchLayer>( + ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(), + ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) @@ -1197,16 +1014,13 @@ void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) auto block_size = node.param().block_size; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - - auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); - fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size); + auto fn = acl_common::generateLayer<arm_compute::NESpaceToDepthLayer>( + ifm_tensor->handle(), ofm_tensor->handle(), block_size); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Split &node) @@ -1221,10 +1035,10 @@ void KernelGenerator::visit(const ir::operation::Split &node) for (const auto &output : node.getOutputs()) output_indexes.emplace_back(output); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); std::vector<arm_compute::ITensor *> output_tensors; for (const auto &ofm_ind : output_indexes) - output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle()); + output_tensors.emplace_back(_tensor_reg->getAclTensor(ofm_ind).get()->handle()); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1233,71 +1047,26 @@ void KernelGenerator::visit(const ir::operation::Split &node) axis += ifm_rank; axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NESplit>(); - - fn->configure(ifm_tensor->handle(), output_tensors, axis); + auto fn = + acl_common::generateLayer<arm_compute::NESplit>(ifm_tensor->handle(), output_tensors, axis); _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::SQRT &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - const ::arm_compute::ActivationLayerInfo act_info{ - ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT}; - - auto fn = std::make_unique<::arm_compute::NEActivationLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle(), act_info); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::SquaredDifference &node) { const auto ofm_index{node.getOutputs().at(0)}; const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getAclTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getAclTensor(rhs_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEElementwiseSquaredDiff>( + lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Sub &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Slice &node) @@ -1307,8 +1076,8 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -1358,13 +1127,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) ends_set.set(i, ends[i]); } - auto fn = std::make_unique<::arm_compute::NESlice>(); - - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); + auto fn = acl_common::generateLayer<arm_compute::NESlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::StridedSlice &node) @@ -1375,8 +1141,8 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto outputData_tensor = _tensor_builder->at(output_index).get(); - auto inputData_tensor = _tensor_builder->at(input_index).get(); + auto outputData_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto inputData_tensor = _tensor_reg->getAclTensor(input_index).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = inputData_tensor->layout(); @@ -1445,14 +1211,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) strides_set.set(i, strides[i]); } - auto fn = std::make_unique<::arm_compute::NEStridedSlice>(); + auto fn = acl_common::generateLayer<arm_compute::NEStridedSlice>( + inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, strides_set, + begin_mask, end_mask, shrink_axis_mask); - fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set, - strides_set, begin_mask, end_mask, shrink_axis_mask); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::TransposeConv &node) @@ -1481,20 +1244,17 @@ void KernelGenerator::visit(const ir::operation::TransposeConv &node) invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1); } - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto ifm_tensor = _tensor_builder->at(ifm_index).get(); - auto ker_tensor = _tensor_builder->at(ker_index).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getAclTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getAclTensor(ker_index).get(); const auto tconv_info = acl_common::asPadStrideInfo(padding, stride); - auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NETransposeConvLayer>( + ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), tconv_info, + invalid_horizontal, invalid_vertical); - fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(), - tconv_info, invalid_horizontal, invalid_vertical); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Transpose &node) @@ -1503,8 +1263,8 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; const auto &perm{node.param().perm}; - auto ofm_tensor = _tensor_builder->at(ofm_idx).get(); - const auto ifm_tensor = _tensor_builder->at(ifm_idx).get(); + auto ofm_tensor = _tensor_reg->getAclTensor(ofm_idx).get(); + const auto ifm_tensor = _tensor_reg->getAclTensor(ifm_idx).get(); const auto frontend_layout = _current_op_seq_layout; const auto backend_layout = ifm_tensor->layout(); @@ -1514,27 +1274,17 @@ void KernelGenerator::visit(const ir::operation::Transpose &node) rank, pv, frontend_layout, backend_layout); std::unique_ptr<::arm_compute::IFunction> fn; - if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2) { - auto l = std::make_unique<::arm_compute::NETranspose>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle()); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NETranspose>(ifm_tensor->handle(), + ofm_tensor->handle()); } else { - auto l = std::make_unique<::arm_compute::NEPermute>(); - - l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv); - - fn = std::move(l); + fn = acl_common::generateLayer<arm_compute::NEPermute>(ifm_tensor->handle(), + ofm_tensor->handle(), backend_pv); } - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Unpack &node) @@ -1548,25 +1298,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) for (const auto &output_index : node.getOutputs()) output_indexes.emplace_back(output_index); - auto input = _tensor_builder->at(input_index).get()->handle(); + auto input = _tensor_reg->getAclTensor(input_index).get()->handle(); std::vector<arm_compute::ITensor *> outputs; for (const auto &output_index : output_indexes) - outputs.emplace_back(_tensor_builder->at(output_index)->handle()); + outputs.emplace_back(_tensor_reg->getAclTensor(output_index)->handle()); const auto frontend_layout = _current_op_seq_layout; - const auto backend_layout = _tensor_builder->at(input_index).get()->layout(); + const auto backend_layout = _tensor_reg->getAclTensor(input_index).get()->layout(); if (axis < 0) axis += input_rank; axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value(); - auto fn = std::make_unique<::arm_compute::NEUnstack>(); - // Disable applied dim_correction std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes; for (const auto &output_index : output_indexes) { size_t output_rank = _ctx.at(output_index).shape().rank(); - const auto &output_tensor = _tensor_builder->at(output_index); + const auto &output_tensor = _tensor_reg->getAclTensor(output_index); orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape()); assert(output_rank == output_tensor->num_dimensions()); if (output_rank != output_tensor->info()->num_dimensions()) @@ -1577,84 +1325,23 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) } } - fn->configure(input, outputs, axis); + auto fn = acl_common::generateLayer<arm_compute::NEUnstack>(input, outputs, axis); _return_fn = asAclFunction(std::move(fn)); } -void KernelGenerator::visit(const ir::operation::Add &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), - arm_compute::ConvertPolicy::SATURATE); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Div &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - _return_fn = std::make_unique<exec::FunctionSequence>( - asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle())); -} - -void KernelGenerator::visit(const ir::operation::Exp &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; - - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); - - auto fn = std::make_unique<::arm_compute::NEExpLayer>(); - - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input_tensor = _tensor_builder->at(input_index).get(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input_tensor = _tensor_reg->getAclTensor(input_index).get(); - auto fn = std::make_unique<::arm_compute::NEReshapeLayer>(); + auto fn = acl_common::generateLayer<arm_compute::NEReshapeLayer>(input_tensor->handle(), + output_tensor->handle()); - fn->configure(input_tensor->handle(), output_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::Comparison &node) @@ -1665,56 +1352,15 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto comparison_type = node.param().comparison_type; - auto output_tensor = _tensor_builder->at(output_index).get(); - auto input0_tensor = _tensor_builder->at(input0_index).get(); - auto input1_tensor = _tensor_builder->at(input1_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>(); - - fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), - (arm_compute::ComparisonOperation)comparison_type); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseMin>(); + auto output_tensor = _tensor_reg->getAclTensor(output_index).get(); + auto input0_tensor = _tensor_reg->getAclTensor(input0_index).get(); + auto input1_tensor = _tensor_reg->getAclTensor(input1_index).get(); - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); + auto fn = acl_common::generateLayer<arm_compute::NEElementwiseComparison>( + input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(), + (arm_compute::ComparisonOperation)comparison_type); - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); -} - -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->at(ofm_index).get(); - auto lhs_tensor = _tensor_builder->at(lhs_index).get(); - auto rhs_tensor = _tensor_builder->at(rhs_index).get(); - - auto fn = std::make_unique<::arm_compute::NEElementwiseMax>(); - - fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle()); - - auto acl_fn = asAclFunction(std::move(fn)); - - _return_fn = std::move(acl_fn); + _return_fn = asAclFunction(std::move(fn)); } void KernelGenerator::visit(const ir::operation::OneHot &node) @@ -1726,17 +1372,16 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)}; const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->at(out_idx).get(); - auto indices_tensor = _tensor_builder->at(indices_idx).get(); - auto depth_tensor = _tensor_builder->at(depth_idx).get(); - auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get(); - auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get(); - - auto fn = std::make_unique<::arm_compute::CPPOneHotEx>(); - fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(), - offvalue_tensor->handle(), output_tensor->handle(), axis); - auto acl_fn = asAclFunction(std::move(fn)); - _return_fn = std::move(acl_fn); + auto output_tensor = _tensor_reg->getAclTensor(out_idx).get(); + auto indices_tensor = _tensor_reg->getAclTensor(indices_idx).get(); + auto depth_tensor = _tensor_reg->getAclTensor(depth_idx).get(); + auto onvalue_tensor = _tensor_reg->getAclTensor(onvalue_idx).get(); + auto offvalue_tensor = _tensor_reg->getAclTensor(offvalue_idx).get(); + + auto fn = acl_common::generateLayer<arm_compute::CPPOneHotEx>( + indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(), + offvalue_tensor->handle(), output_tensor->handle(), axis); + _return_fn = asAclFunction(std::move(fn)); } } // namespace acl_neon diff --git a/runtime/onert/backend/acl_neon/KernelGenerator.h b/runtime/onert/backend/acl_neon/KernelGenerator.h index d6f7932b7..4d269cde5 100644 --- a/runtime/onert/backend/acl_neon/KernelGenerator.h +++ b/runtime/onert/backend/acl_neon/KernelGenerator.h @@ -21,6 +21,8 @@ #include "ir/Operands.h" #include "TensorBuilder.h" +#include "AclTensorRegistry.h" +#include "TensorManager.h" namespace onert { @@ -33,75 +35,57 @@ class KernelGenerator : public IKernelGenerator { public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> &_tensor_reg); void visit(const ir::OpSequence &) override; - void visit(const ir::operation::Abs &) override; void visit(const ir::operation::ArgMax &) override; void visit(const ir::operation::BatchToSpaceND &) override; - void visit(const ir::operation::Cast &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthToSpace &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::Dequantize &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::EmbeddingLookup &) override; - void visit(const ir::operation::Floor &) override; void visit(const ir::operation::FullyConnected &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::HashtableLookup &) override; void visit(const ir::operation::InstanceNorm &) override; void visit(const ir::operation::L2Normalization &) override; - void visit(const ir::operation::L2Pool2D &) override; void visit(const ir::operation::LocalResponseNormalization &) override; - void visit(const ir::operation::LogicalAnd &) override; - void visit(const ir::operation::LogicalNot &) override; - void visit(const ir::operation::LogicalOr &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::LSTM &) override; - void visit(const ir::operation::Mul &) override; - void visit(const ir::operation::Neg &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Pad &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Permute &) override; void visit(const ir::operation::PReLU &) override; void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::ReLU &) override; - void visit(const ir::operation::ReLU1 &) override; - void visit(const ir::operation::ReLU6 &) override; void visit(const ir::operation::Reshape &) override; void visit(const ir::operation::ResizeBilinear &) override; void visit(const ir::operation::RNN &) override; - void visit(const ir::operation::RSQRT &) override; void visit(const ir::operation::Squeeze &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::Split &) override; - void visit(const ir::operation::SQRT &) override; void visit(const ir::operation::SquaredDifference &) override; - void visit(const ir::operation::Sub &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::TransposeConv &) override; void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::Unpack &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Div &) override; - void visit(const ir::operation::Exp &) override; void visit(const ir::operation::ExpandDims &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Max &) override; void visit(const ir::operation::OneHot &) override; private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<acl_common::AclTensorRegistry<TensorManager>> _tensor_reg; ir::Layout _current_op_seq_layout; }; diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc index 2948cab09..ac80901cc 100644 --- a/runtime/onert/backend/acl_neon/Optimizer.cc +++ b/runtime/onert/backend/acl_neon/Optimizer.cc @@ -19,7 +19,7 @@ #include "ParentInfo.h" #include <cassert> -#include <ir/LoweredGraph.h> +#include <compiler/LoweredGraph.h> #include <util/logging.h> #include "AclSubTensorAnalyzer.h" diff --git a/runtime/onert/backend/acl_neon/TensorManager.h b/runtime/onert/backend/acl_neon/TensorManager.h index 3ec9efa8f..3b7cfbcfd 100644 --- a/runtime/onert/backend/acl_neon/TensorManager.h +++ b/runtime/onert/backend/acl_neon/TensorManager.h @@ -55,7 +55,7 @@ using InternalBufferManager = acl_common::AclInternalBufferManager< using TensorManager = acl_common::AclTensorManager<acl_neon::operand::INETensor, operand::NETensor, operand::NESubTensor>; -TensorManager *createTensorManager(bool is_linear_executor) +inline TensorManager *createTensorManager(bool is_linear_executor) { if (is_linear_executor) { diff --git a/runtime/onert/backend/cpu/Backend.h b/runtime/onert/backend/cpu/Backend.h index 56bd352e0..fc8574b26 100644 --- a/runtime/onert/backend/cpu/Backend.h +++ b/runtime/onert/backend/cpu/Backend.h @@ -47,10 +47,12 @@ public: const auto &operands = graph.operands(); const auto &operations = graph.operations(); auto context = std::make_unique<BackendContext>(this, &graph); - auto tb = std::make_shared<TensorBuilder>(); + auto tr = std::make_shared<cpu_common::TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, kb, + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(operands, operations, tb, tr, kb, context->external_context()); context->tensor_register = nullptr; context->optimizer = nullptr; diff --git a/runtime/onert/backend/cpu/BackendContext.h b/runtime/onert/backend/cpu/BackendContext.h index f314a8e39..e90b21054 100644 --- a/runtime/onert/backend/cpu/BackendContext.h +++ b/runtime/onert/backend/cpu/BackendContext.h @@ -31,13 +31,15 @@ class BackendContext : public onert::backend::BackendContext { public: BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, std::shared_ptr<ITensorRegister> tensor_register = nullptr, std::shared_ptr<IOptimizer> optimizer = nullptr) - : onert::backend::BackendContext(backend, graph, tensor_builder, constant_initializer, - kernel_gen, tensor_register, optimizer), + : onert::backend::BackendContext(backend, graph, tensor_registry, tensor_builder, + constant_initializer, kernel_gen, tensor_register, + optimizer), _external_context(new ExternalContext) { } diff --git a/runtime/onert/backend/cpu/ConstantInitializer.cc b/runtime/onert/backend/cpu/ConstantInitializer.cc index deb27f0fe..6f6eb77bc 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.cc +++ b/runtime/onert/backend/cpu/ConstantInitializer.cc @@ -25,8 +25,8 @@ namespace cpu { ConstantInitializer::ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : IConstantInitializer{operands}, _tensor_reg{tensor_reg} { // DO NOTHING } diff --git a/runtime/onert/backend/cpu/ConstantInitializer.h b/runtime/onert/backend/cpu/ConstantInitializer.h index de03a693a..c016c83bc 100644 --- a/runtime/onert/backend/cpu/ConstantInitializer.h +++ b/runtime/onert/backend/cpu/ConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_CPU_CONSTANT_INITIALIZER_H__ -#include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" #include <backend/IConstantInitializer.h> #include <ir/Operands.h> @@ -33,7 +33,7 @@ class ConstantInitializer : public IConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder); + const std::shared_ptr<ITensorRegistry> &tensor_reg); public: void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) override; @@ -50,10 +50,10 @@ public: void visit(const ir::operation::FullyConnected &) override; private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } private: - std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<ITensorRegistry> _tensor_reg; }; } // namespace cpu diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc index 7939fe894..74b6f0c6b 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.cc +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -16,49 +16,36 @@ #include "KernelGenerator.h" -#include "ops/AbsLayer.h" -#include "ops/AddLayer.h" #include "ops/ArgMinMaxLayer.h" -#include "ops/AvgPoolLayer.h" #include "ops/BatchToSpaceNDLayer.h" -#include "ops/CastLayer.h" +#include "ops/BinaryArithmeticLayer.h" #include "ops/CompareLayer.h" #include "ops/ConcatLayer.h" #include "ops/ConvolutionLayer.h" -#include "ops/CosLayer.h" #include "ops/DepthwiseConvolutionLayer.h" -#include "ops/DivLayer.h" #include "ops/EinsumLayer.h" -#include "ops/ExpLayer.h" +#include "ops/ElementwiseActivationLayer.h" +#include "ops/ElementwiseBinaryLayer.h" +#include "ops/ElementwiseUnaryLayer.h" #include "ops/ExpandDimsLayer.h" #include "ops/FillLayer.h" #include "ops/FullyConnectedLayer.h" #include "ops/GatherLayer.h" -#include "ops/LogLayer.h" -#include "ops/LogisticLayer.h" -#include "ops/MaxLayer.h" -#include "ops/MaxPoolLayer.h" #include "ops/MeanLayer.h" -#include "ops/MinLayer.h" -#include "ops/MulLayer.h" -#include "ops/NegLayer.h" #include "ops/OneHotLayer.h" #include "ops/OperationUtils.h" #include "ops/PackLayer.h" #include "ops/PadLayer.h" +#include "ops/PoolLayer.h" #include "ops/PowLayer.h" #include "ops/RangeLayer.h" +#include "ops/RankLayer.h" #include "ops/ReduceLayer.h" -#include "ops/ReLULayer.h" -#include "ops/ReLU6Layer.h" #include "ops/ReshapeLayer.h" #include "ops/ResizeBilinearLayer.h" #include "ops/ReverseLayer.h" -#include "ops/RoundLayer.h" -#include "ops/RsqrtLayer.h" #include "ops/SelectLayer.h" #include "ops/ShapeLayer.h" -#include "ops/SinLayer.h" #include "ops/SliceLayer.h" #include "ops/SoftMaxLayer.h" #include "ops/StridedSliceLayer.h" @@ -66,22 +53,16 @@ #include "ops/SpaceToDepthLayer.h" #include "ops/SplitLayer.h" #include "ops/SplitVLayer.h" -#include "ops/SubLayer.h" -#include "ops/TanhLayer.h" #include "ops/TileLayer.h" #include "ops/TransposeLayer.h" #include "ops/UnpackLayer.h" -#include "ops/LogicalNotLayer.h" -#include "ops/ZerosLikeLayer.h" #include "ops/SquaredDiffLayer.h" -#include "ops/LogicalOrLayer.h" #include "ops/L2NormLayer.h" #include "ops/MatrixBandPartLayer.h" #include "ops/BatchMatMulLayer.h" #include "ops/BroadcastToLayer.h" #include "ops/FusedBatchNormLayer.h" #include "ops/LogSoftMaxLayer.h" -#include "ops/QuantizeLayer.h" #include "ops/StatelessRandomUniformLayer.h" #include <backend/Backend.h> @@ -102,6 +83,104 @@ namespace cpu namespace { +ops::ArithmeticType +convertArithmeticType(ir::operation::BinaryArithmetic::ArithmeticType arithmetic_type_ir) +{ + switch (arithmetic_type_ir) + { + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + return ops::ArithmeticType::kAdd; + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + return ops::ArithmeticType::kSub; + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + return ops::ArithmeticType::kMul; + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + return ops::ArithmeticType::kDiv; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseActivationType +convertElementwiseActivationType(ir::operation::ElementwiseActivation::Type type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseActivation::Type::LOGISTIC: + return ops::ElementwiseActivationType::kLogistic; + case ir::operation::ElementwiseActivation::Type::RELU: + return ops::ElementwiseActivationType::kReLU; + case ir::operation::ElementwiseActivation::Type::TANH: + return ops::ElementwiseActivationType::kTanh; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseBinaryType +convertElementwiseBinaryType(ir::operation::ElementwiseBinary::ElementwiseBinaryType type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR: + return ops::ElementwiseBinaryType::kLogicalOr; + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX: + return ops::ElementwiseBinaryType::kMax; + case ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN: + return ops::ElementwiseBinaryType::kMin; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::ElementwiseUnaryType convertElementwiseUnaryType(ir::operation::ElementwiseUnary::Type type_ir) +{ + switch (type_ir) + { + case ir::operation::ElementwiseUnary::Type::ABS: + return ops::ElementwiseUnaryType::kAbs; + case ir::operation::ElementwiseUnary::Type::CAST: + return ops::ElementwiseUnaryType::kCast; + case ir::operation::ElementwiseUnary::Type::COS: + return ops::ElementwiseUnaryType::kCos; + case ir::operation::ElementwiseUnary::Type::ERF: + return ops::ElementwiseUnaryType::kErf; + case ir::operation::ElementwiseUnary::Type::EXP: + return ops::ElementwiseUnaryType::kExp; + case ir::operation::ElementwiseUnary::Type::LOG: + return ops::ElementwiseUnaryType::kLog; + case ir::operation::ElementwiseUnary::Type::LOGICAL_NOT: + return ops::ElementwiseUnaryType::kLogicalNot; + case ir::operation::ElementwiseUnary::Type::NEG: + return ops::ElementwiseUnaryType::kNeg; + case ir::operation::ElementwiseUnary::Type::QUANTIZE: + return ops::ElementwiseUnaryType::kQuantize; + case ir::operation::ElementwiseUnary::Type::ROUND: + return ops::ElementwiseUnaryType::kRound; + case ir::operation::ElementwiseUnary::Type::RSQRT: + return ops::ElementwiseUnaryType::kRSqrt; + case ir::operation::ElementwiseUnary::Type::SIN: + return ops::ElementwiseUnaryType::kSin; + case ir::operation::ElementwiseUnary::Type::ZEROS_LIKE: + return ops::ElementwiseUnaryType::kZerosLike; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + +ops::PoolType convertPoolType(ir::operation::Pool2D::PoolType type_ir) +{ + switch (type_ir) + { + case ir::operation::Pool2D::PoolType::AVG: + return ops::PoolType::kAvg; + case ir::operation::Pool2D::PoolType::MAX: + return ops::PoolType::kMax; + default: + throw std::runtime_error("cpu KernelGenerator : Not supported operation yet"); + } +} + ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ir) { switch (reduce_type_ir) @@ -127,11 +206,12 @@ ops::ReduceType convertReduceType(ir::operation::Reduce::ReduceType reduce_type_ KernelGenerator::KernelGenerator( const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context) : _ctx(operands_ctx), _operations_ctx{operations_ctx}, _tensor_builder(tensor_builder), - _kernel_builder(kernel_builder), _current_op_seq_layout(ir::Layout::UNKNOWN), - _external_context(external_context) + _tensor_reg{tensor_reg}, _kernel_builder(kernel_builder), + _current_op_seq_layout(ir::Layout::UNKNOWN), _external_context(external_context) { // DO NOTHING } @@ -140,11 +220,9 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) { assert(!_return_fn_seq); assert(_tensor_builder->dynamicTensorManager()); - assert(_tensor_builder->tensorRegistry()); + assert(_tensor_reg); - auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager(); - auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>( - _ctx, dyn_tensor_manager, _tensor_builder->tensorRegistry()); + auto dyn_shape_inferer = std::make_shared<exec::DynamicShapeInferer>(_ctx, _tensor_reg); _return_fn_seq = std::make_unique<exec::FunctionSequence>(); @@ -154,7 +232,7 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) dyn_ctx->op_seq = &op_seq; dyn_ctx->operations = &_operations_ctx; dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry(); + dyn_ctx->tensor_registry = _tensor_reg; dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); @@ -170,13 +248,13 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) for (const auto &ind : (node.getInputs() | ir::Remove::UNDEFINED) + node.getOutputs()) { - auto portable_tensor = _tensor_builder->portableAt(ind); + auto portable_tensor = _tensor_reg->getPortableTensor(ind); if (portable_tensor) { assert(portable_tensor->layout() == ir::Layout::NHWC); } - auto tensor = _tensor_builder->at(ind); + auto tensor = _tensor_reg->getNativeTensor(ind); if (tensor) { tensor->increase_ref(); @@ -194,21 +272,23 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - auto ker_tensor = _tensor_builder->portableAt(ker_index).get(); - auto bias_tensor = _tensor_builder->portableAt(bias_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get(); const auto stride = node.param().stride; const auto activation = node.param().activation; const auto param_padding = node.param().padding; + const auto dilation = node.param().dilation; auto fn = std::make_unique<ops::ConvolutionLayer>(); if (_ctx.at(ifm_index).info().isDynamic() || _ctx.at(ker_index).info().isDynamic()) { fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, param_padding.param.left, param_padding.param.right, param_padding.param.top, param_padding.param.bottom, - stride.horizontal, stride.vertical, activation, ofm_tensor); + stride.horizontal, stride.vertical, dilation.width_factor, dilation.height_factor, + activation, ofm_tensor); _return_fn = std::move(fn); return; @@ -221,11 +301,12 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node) const auto ker_width = ker_shape.dim(2); const auto padding = - ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height); + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + dilation.width_factor, dilation.height_factor); fn->configure(ifm_tensor, ker_tensor, bias_tensor, param_padding.type, padding.left, padding.right, padding.top, padding.bottom, stride.horizontal, stride.vertical, - activation, ofm_tensor); + dilation.width_factor, dilation.height_factor, activation, ofm_tensor); _return_fn = std::move(fn); } @@ -251,10 +332,10 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) const auto multiplier = node.param().multiplier; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - auto ker_tensor = _tensor_builder->portableAt(ker_index).get(); - auto bias_tensor = _tensor_builder->portableAt(bias_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); + auto ker_tensor = _tensor_reg->getPortableTensor(ker_index).get(); + auto bias_tensor = _tensor_reg->getPortableTensor(bias_index).get(); auto fn = std::make_unique<ops::DepthwiseConvolutionLayer>(); @@ -265,57 +346,6 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::MaxPool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; - - const auto kh = node.param().kh; - const auto kw = node.param().kw; - - const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::MaxPoolLayer>(); - - fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, - stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::AvgPool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; - - const auto kh = node.param().kh; - const auto kw = node.param().kw; - const auto stride = node.param().stride; - const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); - const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); - const auto padding = - ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::AvgPoolLayer>(); - - fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, - stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Concat &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -323,11 +353,11 @@ void KernelGenerator::visit(const ir::operation::Concat &node) const auto rank = _ctx.at(ofm_index).shape().rank(); const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); auto fn = std::make_unique<ops::ConcatLayer>(); @@ -342,9 +372,9 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) const auto input_index{node.getInputs().at(ir::operation::BatchToSpaceND::INPUT)}; const auto block_size_index{node.getInputs().at(ir::operation::BatchToSpaceND::BLOCK_SIZE)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto input_alloc = _tensor_builder->portableAt(input_index).get(); - auto block_size_alloc = _tensor_builder->portableAt(block_size_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto input_alloc = _tensor_reg->getPortableTensor(input_index).get(); + auto block_size_alloc = _tensor_reg->getPortableTensor(block_size_index).get(); auto fn = std::make_unique<ops::BatchToSpaceNDLayer>(); @@ -354,7 +384,7 @@ void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node) if (node.getInputs().size() != NNApiInputs) { const auto crops_data_index{node.getInputs().at(ir::operation::BatchToSpaceND::CROPS_DATA)}; - crops_alloc = _tensor_builder->portableAt(crops_data_index).get(); + crops_alloc = _tensor_reg->getPortableTensor(crops_data_index).get(); } fn->configure(input_alloc, output_alloc, block_size_alloc, crops_alloc); @@ -368,9 +398,9 @@ void KernelGenerator::visit(const ir::operation::Fill &node) const auto input_index{node.getInputs().at(ir::operation::Fill::Input::INPUT)}; const auto value_index{node.getInputs().at(ir::operation::Fill::Input::VALUE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto value_tensor = _tensor_builder->portableAt(value_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto value_tensor = _tensor_reg->getPortableTensor(value_index).get(); auto fn = std::make_unique<ops::FillLayer>(); @@ -389,11 +419,11 @@ void KernelGenerator::visit(const ir::operation::FullyConnected &node) const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto weight_tensor = _tensor_builder->portableAt(weight_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto weight_tensor = _tensor_reg->getPortableTensor(weight_index).get(); auto bias_tensor = - bias_index.undefined() ? nullptr : _tensor_builder->portableAt(bias_index).get(); + bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index).get(); auto fn = std::make_unique<ops::FullyConnectedLayer>(); @@ -408,8 +438,8 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); // optional 2nd input IPortableTensor *shape_tensor = nullptr; @@ -417,7 +447,7 @@ void KernelGenerator::visit(const ir::operation::Reshape &node) if (node.getInputs().size() == 2) { const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; - shape_tensor = _tensor_builder->portableAt(shape_index).get(); + shape_tensor = _tensor_reg->getPortableTensor(shape_index).get(); } auto fn = std::make_unique<ops::ReshapeLayer>(); @@ -431,8 +461,8 @@ void KernelGenerator::visit(const ir::operation::Squeeze &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); // Squeeze can share same kernel with reshape auto fn = std::make_unique<ops::ReshapeLayer>(); @@ -449,8 +479,8 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) const auto beta = node.param().beta; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::SoftMaxLayer>(); @@ -459,21 +489,22 @@ void KernelGenerator::visit(const ir::operation::Softmax &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Add &node) +void KernelGenerator::visit(const ir::operation::BinaryArithmetic &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + const auto lhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)}; const auto activation = node.param().activation; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::AddLayer>(); + auto fn = std::make_unique<ops::BinaryArithmeticLayer>(); - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); + fn->configure(lhs_tensor, rhs_tensor, ofm_tensor, activation, + convertArithmeticType(node.param().arithmetic_type)); _return_fn = std::move(fn); } @@ -484,9 +515,9 @@ void KernelGenerator::visit(const ir::operation::Comparison &node) const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); auto comparison_type = node.param().comparison_type; @@ -503,9 +534,9 @@ void KernelGenerator::visit(const ir::operation::Gather &node) const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto indices_tensor = _tensor_builder->portableAt(indices_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get(); const auto backend_layout = output_tensor->layout(); UNUSED_RELEASE(backend_layout); @@ -534,46 +565,6 @@ void KernelGenerator::visit(const ir::operation::Gather &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Sub &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::SubLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Mul &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MulLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::OneHot &node) { const auto output_index{node.getOutputs().at(0)}; @@ -584,11 +575,11 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto indices_tensor = _tensor_builder->portableAt(indices_index).get(); - auto depth_tensor = _tensor_builder->portableAt(depth_index).get(); - auto onvalue_tensor = _tensor_builder->portableAt(onvalue_index).get(); - auto offvalue_tensor = _tensor_builder->portableAt(offvalue_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto indices_tensor = _tensor_reg->getPortableTensor(indices_index).get(); + auto depth_tensor = _tensor_reg->getPortableTensor(depth_index).get(); + auto onvalue_tensor = _tensor_reg->getPortableTensor(onvalue_index).get(); + auto offvalue_tensor = _tensor_reg->getPortableTensor(offvalue_index).get(); assert(indices_tensor->data_type() == OperandType::INT32); assert(axis <= static_cast<int>(indices_tensor->num_dimensions())); @@ -600,34 +591,14 @@ void KernelGenerator::visit(const ir::operation::OneHot &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Div &node) -{ - // The same as Add - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; - - const auto activation = node.param().activation; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::DivLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, activation, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Einsum &node) { const auto ofm_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); const auto equation = node.param().equation; @@ -648,7 +619,7 @@ void KernelGenerator::visit(const ir::operation::Custom &node) const auto &operand = _ctx.at(idx); // TODO make sure using `_current_op_seq_layout` is correct for custom operations types.emplace_back(custom::TypeInfo{operand.shape(), operand.typeInfo().type()}); - auto in_tensor = _tensor_builder->portableAt(idx); + auto in_tensor = _tensor_reg->getPortableTensor(idx); tensors.emplace_back(in_tensor); } }; @@ -666,64 +637,68 @@ void KernelGenerator::visit(const ir::operation::Custom &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Exp &node) +void KernelGenerator::visit(const ir::operation::ElementwiseActivation &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); - auto fn = std::make_unique<ops::ExpLayer>(); + auto fn = std::make_unique<ops::ElementwiseActivationLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, output_tensor, node.param().alpha, node.param().beta, + convertElementwiseActivationType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ExpandDims &node) +void KernelGenerator::visit(const ir::operation::ElementwiseBinary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; - const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axis_tensor = _tensor_builder->portableAt(axis_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::ExpandDimsLayer>(); + auto fn = std::make_unique<ops::ElementwiseBinaryLayer>(); - fn->configure(input_tensor, axis_tensor, output_tensor); + fn->configure(lhs_tensor, rhs_tensor, output_tensor, + convertElementwiseBinaryType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Logistic &node) +void KernelGenerator::visit(const ir::operation::ElementwiseUnary &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); - auto fn = std::make_unique<ops::LogisticLayer>(); + auto fn = std::make_unique<ops::ElementwiseUnaryLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, output_tensor, convertElementwiseUnaryType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Tanh &node) +void KernelGenerator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; + const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)}; + const auto axis_index{node.getInputs().at(ir::operation::ExpandDims::Input::AXIS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get(); - auto fn = std::make_unique<ops::TanhLayer>(); + auto fn = std::make_unique<ops::ExpandDimsLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(input_tensor, axis_tensor, output_tensor); _return_fn = std::move(fn); } @@ -737,11 +712,11 @@ void KernelGenerator::visit(const ir::operation::Pack &node) assert(-rank <= axis && axis < rank); - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); auto fn = std::make_unique<ops::PackLayer>(); @@ -759,11 +734,11 @@ void KernelGenerator::visit(const ir::operation::Unpack &node) assert(rank == 0 || (-rank <= axis && axis < rank)); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); std::vector<IPortableTensor *> output_tensors; for (auto &output_idx : node.getOutputs()) - output_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + output_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::UnpackLayer>(); @@ -781,8 +756,8 @@ void KernelGenerator::visit(const ir::operation::Pad &node) const auto output_index{node.getOutputs().at(0)}; assert(_ctx.at(pad_index).data()); - auto input = _tensor_builder->portableAt(input_index).get(); - auto output = _tensor_builder->portableAt(output_index).get(); + auto input = _tensor_reg->getPortableTensor(input_index).get(); + auto output = _tensor_reg->getPortableTensor(output_index).get(); auto pad_rank = _ctx.at(pad_index).shape().dim(0); auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base()); @@ -801,62 +776,13 @@ void KernelGenerator::visit(const ir::operation::Pad &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Max &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MaxLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Min &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::MinLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Cast &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::CastLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Transpose &node) { const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(ir::operation::Transpose::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::TransposeLayer>(); @@ -872,9 +798,9 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)}; const auto keep_dims = node.param().keep_dims; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axes_tensor = _tensor_builder->portableAt(axes_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axes_tensor = _tensor_reg->getPortableTensor(axes_index).get(); if (node.param().reduce_type == ir::operation::Reduce::ReduceType::MEAN) { @@ -895,36 +821,6 @@ void KernelGenerator::visit(const ir::operation::Reduce &node) } } -void KernelGenerator::visit(const ir::operation::ReLU &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::ReLULayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::ReLU6 &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(0)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::ReLU6Layer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Select &node) { const auto output_index{node.getOutputs().at(0)}; @@ -932,10 +828,10 @@ void KernelGenerator::visit(const ir::operation::Select &node) const auto true_index{node.getInputs().at(ir::operation::Select::Input::INPUT_TRUE)}; const auto false_index{node.getInputs().at(ir::operation::Select::Input::INPUT_FALSE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto condition_tensor = _tensor_builder->portableAt(condition_index).get(); - auto true_tensor = _tensor_builder->portableAt(true_index).get(); - auto false_tensor = _tensor_builder->portableAt(false_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto condition_tensor = _tensor_reg->getPortableTensor(condition_index).get(); + auto true_tensor = _tensor_reg->getPortableTensor(true_index).get(); + auto false_tensor = _tensor_reg->getPortableTensor(false_index).get(); auto fn = std::make_unique<ops::SelectLayer>(); @@ -951,10 +847,10 @@ void KernelGenerator::visit(const ir::operation::Slice &node) const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto begins_tensor = _tensor_builder->portableAt(begins_index).get(); - auto sizes_tensor = _tensor_builder->portableAt(sizes_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto begins_tensor = _tensor_reg->getPortableTensor(begins_index).get(); + auto sizes_tensor = _tensor_reg->getPortableTensor(sizes_index).get(); auto fn = std::make_unique<ops::SliceLayer>(); @@ -971,11 +867,11 @@ void KernelGenerator::visit(const ir::operation::StridedSlice &node) const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto starts_tensor = _tensor_builder->portableAt(starts_index).get(); - auto ends_tensor = _tensor_builder->portableAt(ends_index).get(); - auto strides_tensor = _tensor_builder->portableAt(strides_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto starts_tensor = _tensor_reg->getPortableTensor(starts_index).get(); + auto ends_tensor = _tensor_reg->getPortableTensor(ends_index).get(); + auto strides_tensor = _tensor_reg->getPortableTensor(strides_index).get(); auto begin_mask = node.param().begin_mask; auto end_mask = node.param().end_mask; @@ -999,11 +895,11 @@ void KernelGenerator::visit(const ir::operation::Split &node) const auto axis = ops::getAxis(rank, node.param().axis, _current_op_seq_layout); auto axis_resolved = axis < 0 ? axis + rank : axis; - auto in_tensor = _tensor_builder->portableAt(input_idx).get(); + auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get(); std::vector<IPortableTensor *> out_tensors; for (auto &output_idx : node.getOutputs()) - out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::SplitLayer>(); @@ -1012,73 +908,13 @@ void KernelGenerator::visit(const ir::operation::Split &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Abs &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::AbsLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Sin &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::SinLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Cos &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Cos::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::CosLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::RSQRT &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::RsqrtLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::Shape &node) { const auto ofm_index{node.getOutputs().at(0)}; const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); auto fn = std::make_unique<ops::ShapeLayer>(); @@ -1097,8 +933,8 @@ void KernelGenerator::visit(const ir::operation::ResizeBilinear &node) auto align_corners = node.param().align_corners; auto half_pixel_centers = node.param().half_pixel_centers; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::ResizeBilinearLayer>(); @@ -1114,9 +950,9 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) const auto input_index{node.getInputs().at(ir::operation::Reverse::INPUT)}; const auto axis_index{node.getInputs().at(ir::operation::Reverse::AXIS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto axis_tensor = _tensor_builder->portableAt(axis_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto axis_tensor = _tensor_reg->getPortableTensor(axis_index).get(); auto fn = std::make_unique<ops::ReverseLayer>(); @@ -1125,21 +961,6 @@ void KernelGenerator::visit(const ir::operation::Reverse &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Neg &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::NegLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::ArgMax &node) { const auto output_index{node.getOutputs().at(0)}; @@ -1147,8 +968,8 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::ArgMinMaxLayer>(); @@ -1157,81 +978,45 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Pow &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); - - auto fn = std::make_unique<ops::PowLayer>(); - - fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::Log &node) +void KernelGenerator::visit(const ir::operation::Pool2D &node) { const auto ofm_index{node.getOutputs().at(0)}; - const auto ifm_index{node.getInputs().at(ir::operation::Log::Input::INPUT)}; - - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto ifm_tensor = _tensor_builder->portableAt(ifm_index).get(); - - auto fn = std::make_unique<ops::LogLayer>(); - - fn->configure(ifm_tensor, ofm_tensor); - - _return_fn = std::move(fn); -} + const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; -void KernelGenerator::visit(const ir::operation::Round &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::Round::INPUT)}; + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto padding = + ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); - auto fn = std::make_unique<ops::RoundLayer>(); + auto fn = std::make_unique<ops::PoolLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(ifm_tensor, padding.left, padding.right, padding.top, padding.bottom, + stride.horizontal, stride.vertical, kw, kh, activation, ofm_tensor, + convertPoolType(node.param().op_type)); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::LogicalNot &node) +void KernelGenerator::visit(const ir::operation::Pow &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::LogicalNot::INPUT)}; - - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - - auto fn = std::make_unique<ops::LogicalNotLayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - -void KernelGenerator::visit(const ir::operation::LogicalOr &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(0)}; - const auto rhs_index{node.getInputs().at(1)}; + const auto lhs_index{node.getInputs().at(ir::operation::Pow::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Pow::RHS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); - auto fn = std::make_unique<ops::LogicalOrLayer>(); + auto fn = std::make_unique<ops::PowLayer>(); - fn->configure(lhs_tensor, rhs_tensor, ofm_tensor); + fn->configure(lhs_tensor, rhs_tensor, ir::Activation::NONE, output_tensor); _return_fn = std::move(fn); } @@ -1241,8 +1026,8 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) const auto output_index{node.getOutputs().at(0)}; const auto input_index{node.getInputs().at(0)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto input_alloc = _tensor_builder->portableAt(input_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto input_alloc = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::L2NormLayer>(); @@ -1251,35 +1036,36 @@ void KernelGenerator::visit(const ir::operation::L2Normalization &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::ZerosLike &node) +void KernelGenerator::visit(const ir::operation::Range &node) { const auto output_index{node.getOutputs().at(0)}; - const auto input_index{node.getInputs().at(ir::operation::ZerosLike::INPUT)}; + const auto start_index{node.getInputs().at(ir::operation::Range::START)}; + const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)}; + const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto start_tensor = _tensor_reg->getPortableTensor(start_index).get(); + auto limit_tensor = _tensor_reg->getPortableTensor(limit_index).get(); + auto delta_tensor = _tensor_reg->getPortableTensor(delta_index).get(); - auto fn = std::make_unique<ops::ZerosLikeLayer>(); + auto fn = std::make_unique<ops::RangeLayer>(); - fn->configure(input_tensor, output_tensor); + fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor); _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Range &node) +void KernelGenerator::visit(const ir::operation::Rank &node) { - const auto output_index{node.getOutputs().at(0)}; - const auto start_index{node.getInputs().at(ir::operation::Range::START)}; - const auto limit_index{node.getInputs().at(ir::operation::Range::LIMIT)}; - const auto delta_index{node.getInputs().at(ir::operation::Range::DELTA)}; + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto start_tensor = _tensor_builder->portableAt(start_index).get(); - auto limit_tensor = _tensor_builder->portableAt(limit_index).get(); - auto delta_tensor = _tensor_builder->portableAt(delta_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto ifm_tensor = _tensor_reg->getPortableTensor(ifm_index).get(); - auto fn = std::make_unique<ops::RangeLayer>(); + auto fn = std::make_unique<ops::RankLayer>(); + + fn->configure(ifm_tensor, ofm_tensor); - fn->configure(start_tensor, limit_tensor, delta_tensor, output_tensor); _return_fn = std::move(fn); } @@ -1289,9 +1075,9 @@ void KernelGenerator::visit(const ir::operation::SquaredDifference &node) const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)}; - auto ofm_tensor = _tensor_builder->portableAt(ofm_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto ofm_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); auto fn = std::make_unique<ops::SqDiffLayer>(); @@ -1305,9 +1091,9 @@ void KernelGenerator::visit(const ir::operation::Tile &node) const auto input_index{node.getInputs().at(ir::operation::Tile::INPUT)}; const auto multiples_index{node.getInputs().at(ir::operation::Tile::MULTIPLES)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto multiples_tensor = _tensor_builder->portableAt(multiples_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto multiples_tensor = _tensor_reg->getPortableTensor(multiples_index).get(); auto fn = std::make_unique<ops::TileLayer>(); @@ -1322,10 +1108,10 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node) const auto num_lower_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_LOWER_DIAG)}; const auto num_upper_index{node.getInputs().at(ir::operation::MatrixBandPart::NUM_UPPER_DIAG)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto num_lower_tensor = _tensor_builder->portableAt(num_lower_index).get(); - auto num_upper_tensor = _tensor_builder->portableAt(num_upper_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto num_lower_tensor = _tensor_reg->getPortableTensor(num_lower_index).get(); + auto num_upper_tensor = _tensor_reg->getPortableTensor(num_upper_index).get(); auto fn = std::make_unique<ops::MatrixBandPartLayer>(); @@ -1339,9 +1125,9 @@ void KernelGenerator::visit(const ir::operation::BatchMatMul &node) const auto lhs_index{node.getInputs().at(ir::operation::BatchMatMul::LHS)}; const auto rhs_index{node.getInputs().at(ir::operation::BatchMatMul::RHS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto lhs_tensor = _tensor_builder->portableAt(lhs_index).get(); - auto rhs_tensor = _tensor_builder->portableAt(rhs_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto lhs_tensor = _tensor_reg->getPortableTensor(lhs_index).get(); + auto rhs_tensor = _tensor_reg->getPortableTensor(rhs_index).get(); const auto adj_x = node.param().adj_x; const auto adj_y = node.param().adj_y; @@ -1358,9 +1144,9 @@ void KernelGenerator::visit(const ir::operation::BroadcastTo &node) const auto input_index{node.getInputs().at(ir::operation::BroadcastTo::INPUT)}; const auto shape_index{node.getInputs().at(ir::operation::BroadcastTo::SHAPE)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto shape_tensor = _tensor_builder->portableAt(shape_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto shape_tensor = _tensor_reg->getPortableTensor(shape_index).get(); auto fn = std::make_unique<ops::BroadcastToLayer>(); @@ -1373,10 +1159,10 @@ void KernelGenerator::visit(const ir::operation::FusedBatchNorm &node) { const auto ofm_index{node.getOutputs().at(0)}; - auto output_tensor = _tensor_builder->portableAt(ofm_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(ofm_index).get(); std::vector<const IPortableTensor *> input_tensors; for (auto &ifm_idx : node.getInputs()) - input_tensors.emplace_back(_tensor_builder->portableAt(ifm_idx).get()); + input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx).get()); const auto epsilon = node.param().epsilon; const auto is_training = node.param().is_training; @@ -1397,8 +1183,8 @@ void KernelGenerator::visit(const ir::operation::LogSoftmax &node) const auto beta = node.param().beta; const auto axis = node.param().axis; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); auto fn = std::make_unique<ops::LogSoftMaxLayer>(); @@ -1414,10 +1200,10 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) const auto block_shape_index{node.getInputs().at(ir::operation::SpaceToBatchND::BLOCK_SIZE)}; const auto padding_index{node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS)}; - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto block_shape_tensor = _tensor_builder->portableAt(block_shape_index).get(); - auto padding_tensor = _tensor_builder->portableAt(padding_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto block_shape_tensor = _tensor_reg->getPortableTensor(block_shape_index).get(); + auto padding_tensor = _tensor_reg->getPortableTensor(padding_index).get(); auto fn = std::make_unique<ops::SpaceToBatchNDLayer>(); @@ -1426,29 +1212,14 @@ void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node) _return_fn = std::move(fn); } -void KernelGenerator::visit(const ir::operation::Quantize &node) -{ - const auto input_index{node.getInputs().at(ir::operation::Quantize::Input::INPUT)}; - const auto output_index{node.getOutputs().at(0)}; - - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto output_tensor = _tensor_builder->portableAt(output_index).get(); - - auto fn = std::make_unique<ops::QuantizeLayer>(); - - fn->configure(input_tensor, output_tensor); - - _return_fn = std::move(fn); -} - void KernelGenerator::visit(const ir::operation::SpaceToDepth &node) { const auto input_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)}; const auto output_index{node.getOutputs().at(0)}; auto block_size = node.param().block_size; - auto input_tensor = _tensor_builder->portableAt(input_index).get(); - auto output_tensor = _tensor_builder->portableAt(output_index).get(); + auto input_tensor = _tensor_reg->getPortableTensor(input_index).get(); + auto output_tensor = _tensor_reg->getPortableTensor(output_index).get(); auto fn = std::make_unique<ops::SpaceToDepthLayer>(); @@ -1462,9 +1233,9 @@ void KernelGenerator::visit(const ir::operation::StatelessRandomUniform &node) const auto shape_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SHAPE)}; const auto seed_index{node.getInputs().at(ir::operation::StatelessRandomUniform::SEED)}; - auto output_alloc = _tensor_builder->portableAt(output_index).get(); - auto shape_alloc = _tensor_builder->portableAt(shape_index).get(); - auto seed_alloc = _tensor_builder->portableAt(seed_index).get(); + auto output_alloc = _tensor_reg->getPortableTensor(output_index).get(); + auto shape_alloc = _tensor_reg->getPortableTensor(shape_index).get(); + auto seed_alloc = _tensor_reg->getPortableTensor(seed_index).get(); auto fn = std::make_unique<ops::StatelessRandomUniformLayer>(); @@ -1481,13 +1252,13 @@ void KernelGenerator::visit(const ir::operation::SplitV &node) const auto size_splits{node.getInputs().at(ir::operation::SplitV::Input::SIZE_SPLITS)}; const auto split_dim{node.getInputs().at(ir::operation::SplitV::Input::SPLIT_DIM)}; - auto in_tensor = _tensor_builder->portableAt(input_idx).get(); - auto in_size_splits = _tensor_builder->portableAt(size_splits).get(); - auto in_split_dim = _tensor_builder->portableAt(split_dim).get(); + auto in_tensor = _tensor_reg->getPortableTensor(input_idx).get(); + auto in_size_splits = _tensor_reg->getPortableTensor(size_splits).get(); + auto in_split_dim = _tensor_reg->getPortableTensor(split_dim).get(); std::vector<IPortableTensor *> out_tensors; for (auto &output_idx : node.getOutputs()) - out_tensors.emplace_back(_tensor_builder->portableAt(output_idx).get()); + out_tensors.emplace_back(_tensor_reg->getPortableTensor(output_idx).get()); auto fn = std::make_unique<ops::SplitVLayer>(); diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h index 40c056a96..786e68ee0 100644 --- a/runtime/onert/backend/cpu/KernelGenerator.h +++ b/runtime/onert/backend/cpu/KernelGenerator.h @@ -19,6 +19,7 @@ #include "ExternalContext.h" #include "TensorBuilder.h" +#include "backend/cpu_common/TensorRegistry.h" #include "Tensor.h" #include <backend/CustomKernelBuilder.h> @@ -38,6 +39,7 @@ class KernelGenerator : public IKernelGenerator public: KernelGenerator(const ir::Operands &operands_ctx, const ir::Operations &operations_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg, const std::shared_ptr<custom::IKernelBuilder> &kernel_builder, const std::shared_ptr<ExternalContext> &external_context); @@ -46,8 +48,6 @@ public: void visit(const ir::OpSequence &) override; void visit(const ir::operation::Conv2D &) override; void visit(const ir::operation::DepthwiseConv2D &) override; - void visit(const ir::operation::MaxPool2D &) override; - void visit(const ir::operation::AvgPool2D &) override; void visit(const ir::operation::Concat &) override; void visit(const ir::operation::Fill &) override; void visit(const ir::operation::FullyConnected &) override; @@ -55,51 +55,35 @@ public: void visit(const ir::operation::Squeeze &) override; void visit(const ir::operation::Softmax &) override; void visit(const ir::operation::Comparison &) override; - void visit(const ir::operation::Add &) override; - void visit(const ir::operation::Sub &) override; - void visit(const ir::operation::Mul &) override; - void visit(const ir::operation::Div &) override; + void visit(const ir::operation::BinaryArithmetic &) override; void visit(const ir::operation::Einsum &) override; void visit(const ir::operation::Gather &) override; void visit(const ir::operation::Custom &node) override; - void visit(const ir::operation::Exp &) override; + void visit(const ir::operation::ElementwiseActivation &) override; + void visit(const ir::operation::ElementwiseBinary &) override; + void visit(const ir::operation::ElementwiseUnary &) override; void visit(const ir::operation::ExpandDims &) override; - void visit(const ir::operation::Logistic &) override; void visit(const ir::operation::Pad &) override; - void visit(const ir::operation::Max &) override; - void visit(const ir::operation::Min &) override; - void visit(const ir::operation::Tanh &) override; void visit(const ir::operation::Pack &) override; void visit(const ir::operation::Unpack &) override; void visit(const ir::operation::OneHot &) override; - void visit(const ir::operation::Cast &) override; void visit(const ir::operation::Transpose &) override; void visit(const ir::operation::Reduce &) override; - void visit(const ir::operation::ReLU &) override; - void visit(const ir::operation::ReLU6 &) override; void visit(const ir::operation::Select &) override; void visit(const ir::operation::Slice &) override; void visit(const ir::operation::StridedSlice &) override; void visit(const ir::operation::Split &) override; - void visit(const ir::operation::Abs &) override; - void visit(const ir::operation::Cos &) override; - void visit(const ir::operation::Sin &) override; - void visit(const ir::operation::RSQRT &) override; void visit(const ir::operation::Shape &) override; void visit(const ir::operation::ResizeBilinear &node) override; void visit(const ir::operation::Reverse &) override; - void visit(const ir::operation::Neg &) override; void visit(const ir::operation::ArgMax &) override; - void visit(const ir::operation::Log &) override; - void visit(const ir::operation::Round &) override; + void visit(const ir::operation::Pool2D &) override; void visit(const ir::operation::Pow &) override; - void visit(const ir::operation::LogicalNot &) override; - void visit(const ir::operation::ZerosLike &) override; void visit(const ir::operation::SquaredDifference &) override; void visit(const ir::operation::Tile &) override; - void visit(const ir::operation::LogicalOr &) override; void visit(const ir::operation::L2Normalization &) override; void visit(const ir::operation::Range &) override; + void visit(const ir::operation::Rank &) override; void visit(const ir::operation::MatrixBandPart &) override; void visit(const ir::operation::BatchMatMul &) override; void visit(const ir::operation::BatchToSpaceND &) override; @@ -107,7 +91,6 @@ public: void visit(const ir::operation::FusedBatchNorm &) override; void visit(const ir::operation::LogSoftmax &) override; void visit(const ir::operation::SpaceToBatchND &) override; - void visit(const ir::operation::Quantize &) override; void visit(const ir::operation::SpaceToDepth &) override; void visit(const ir::operation::StatelessRandomUniform &) override; void visit(const ir::operation::SplitV &) override; @@ -116,6 +99,7 @@ private: const ir::Operands &_ctx; const ir::Operations &_operations_ctx; std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::shared_ptr<backend::custom::IKernelBuilder> _kernel_builder; ir::Layout _current_op_seq_layout; const std::shared_ptr<ExternalContext> _external_context; diff --git a/runtime/onert/backend/cpu/TensorBuilder.cc b/runtime/onert/backend/cpu/TensorBuilder.cc index ab8ba5756..828d52f7c 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.cc +++ b/runtime/onert/backend/cpu/TensorBuilder.cc @@ -27,8 +27,8 @@ namespace backend namespace cpu { -TensorBuilder::TensorBuilder() - : _tensor_reg{new cpu_common::TensorRegistry()}, +TensorBuilder::TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new cpu_common::DynamicTensorManager(_tensor_reg)}, _static_tensor_mgr{new StaticTensorManager(_tensor_reg, _dynamic_tensor_mgr.get())} { @@ -57,7 +57,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); const auto tensor_info = _tensor_info_map.at(ind); - if (!at(ind)->is_dynamic()) + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) { const auto size = tensor_info.total_size(); _static_tensor_mgr->claimPlan(ind, size); @@ -66,7 +66,7 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { - if (!at(ind)->is_dynamic()) + if (!_tensor_reg->getNativeTensor(ind)->is_dynamic()) { _static_tensor_mgr->releasePlan(ind); } @@ -85,29 +85,6 @@ void TensorBuilder::allocate() // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. } -std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind) -{ - return _tensor_reg->getITensor(ind); -} - -std::shared_ptr<IPortableTensor> TensorBuilder::portableAt(const ir::OperandIndex &ind) -{ - return _tensor_reg->getPortableTensor(ind); -} - -bool TensorBuilder::setMigrantTensor(const ir::OperandIndex &ind, - const std::shared_ptr<IPortableTensor> &tensor) -{ - return _tensor_reg->setMigrantTensor(ind, tensor); -} - -void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); } - -std::shared_ptr<Tensor> TensorBuilder::at(const ir::OperandIndex &ind) -{ - return _tensor_reg->getNativeTensor(ind); -} - std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void) { return std::move(_static_tensor_mgr); diff --git a/runtime/onert/backend/cpu/TensorBuilder.h b/runtime/onert/backend/cpu/TensorBuilder.h index 617136514..b6d5f09cc 100644 --- a/runtime/onert/backend/cpu/TensorBuilder.h +++ b/runtime/onert/backend/cpu/TensorBuilder.h @@ -38,9 +38,7 @@ namespace cpu class TensorBuilder : public ITensorBuilder { public: - TensorBuilder(); - - bool supportDynamicTensor() override { return true; } + TensorBuilder(const std::shared_ptr<cpu_common::TensorRegistry> &tensor_reg); /** * @brief Register tensor information to allocate on CPU backend @@ -60,34 +58,12 @@ public: void allocate() override; void postFunctionPrepare() override { /* DO NOTHING */} - /** - * @brief Get tensor with a specific OperandIndex - * - * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise. - */ - std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override; - - void iterate(const IterateFunction &fn) override; - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) override; - /** - * @brief Get tensor with a specific OperandIndex. - * @param ind OperandIndex for the tensor. There must exist a tensor with this ind. - * If not, program will crash with assert or exception. - * @return shared_ptr<Tensor> - */ - std::shared_ptr<Tensor> at(const ir::OperandIndex &ind); - std::shared_ptr<IPortableTensor> portableAt(const ir::OperandIndex &ind); - bool setMigrantTensor(const ir::OperandIndex &ind, - const std::shared_ptr<IPortableTensor> &tensor) override; - - std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; } - private: const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; std::unique_ptr<cpu_common::DynamicTensorManager> _dynamic_tensor_mgr; diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.cc b/runtime/onert/backend/cpu/ops/AbsLayer.cc deleted file mode 100644 index 322785aeb..000000000 --- a/runtime/onert/backend/cpu/ops/AbsLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AbsLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -AbsLayer::AbsLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void AbsLayer::absFloat32() -{ - nnfw::cker::Abs(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void AbsLayer::absQuant8() { throw std::runtime_error{"NYI"}; } - -void AbsLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void AbsLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - absFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - absQuant8(); - } - else - { - throw std::runtime_error{"Abs: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AbsLayer.h b/runtime/onert/backend/cpu/ops/AbsLayer.h deleted file mode 100644 index feb5f35ae..000000000 --- a/runtime/onert/backend/cpu/ops/AbsLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ - -#include "backend/IPortableTensor.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AbsLayer : public ::onert::exec::IFunction -{ -public: - AbsLayer(); - -public: - void absFloat32(); - - void absQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ABSLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/AddLayer.cc b/runtime/onert/backend/cpu/ops/AddLayer.cc deleted file mode 100644 index 379215303..000000000 --- a/runtime/onert/backend/cpu/ops/AddLayer.cc +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AddLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void AddLayer::addFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void AddLayer::addInt32() -{ - int32_t output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); -} - -void AddLayer::addQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - // Parameters for scaled quantized computation - op_params.left_shift = 20; - // Zero-points of input and output tensors - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); - assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); - assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); - - // Compute normalized scale for _lhs and _rhs values, - // and represent in 32-bit fixed point - const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale()); - const double real_lhs_scale = _lhs->data_scale() / norm_max_scale; - const double real_rhs_scale = _rhs->data_scale() / norm_max_scale; - // output scale is used to normalize final result, so we invert the scale here - const double real_output_scale = - norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift)); - - // Represent the scales as fixed int32_t multipliers, and int32_t shifts - QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); - QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); - QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); - - // cker quant8 add is not implemented yet - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::ADD>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void AddLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void AddLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - addFloat32(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - addQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - addInt32(); - } - else - { - throw std::runtime_error{"Add: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AddLayer.h b/runtime/onert/backend/cpu/ops/AddLayer.h deleted file mode 100644 index 91030d93a..000000000 --- a/runtime/onert/backend/cpu/ops/AddLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AddLayer : public ::onert::exec::IFunction -{ -public: - AddLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void addFloat32(); - - void addQuant8(); - - void addInt32(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ADDLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc b/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc deleted file mode 100644 index 9c22c1c86..000000000 --- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.cc +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "AvgPoolLayer.h" - -#include <cker/operation/AveragePool.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -#define AVGPOOLING_PARAMETERS \ - nnfw::cker::PoolParams op_params; \ - op_params.stride_height = _strideHeight; \ - op_params.stride_width = _strideWidth; \ - op_params.filter_height = _kernelHeight; \ - op_params.filter_width = _kernelWidth; \ - op_params.padding_values.height = (int8_t)_paddingTop; \ - op_params.padding_values.width = (int8_t)_paddingLeft; - -AvgPoolLayer::AvgPoolLayer() - : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0), - _activation(ir::Activation::NONE) -{ - // DO NOTHING -} - -void AvgPoolLayer::averagePoolFloat32() -{ - AVGPOOLING_PARAMETERS - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - nnfw::cker::AveragePool(op_params, getTensorShape(_input), - reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} -void AvgPoolLayer::averagePoolQuant8() -{ - AVGPOOLING_PARAMETERS - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - nnfw::cker::AveragePool(op_params, getTensorShape(_input), - reinterpret_cast<const uint8_t *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void AvgPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output) -{ - assert(input != nullptr); - assert(output != nullptr); - - _input = input; - _paddingLeft = paddingLeft; - _paddingRight = paddingRight; - _paddingTop = paddingTop; - _paddingBottom = paddingBottom; - _strideWidth = strideWidth; - _strideHeight = strideHeight; - _kernelWidth = kernelWidth; - _kernelHeight = kernelHeight; - _activation = activation; - _output = output; -} - -void AvgPoolLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - averagePoolFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - averagePoolQuant8(); - } - else - { - throw std::runtime_error{"AvgPool: unsupported data type"}; - } -} - -#undef AVGPOOLING_PARAMETERS - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h b/runtime/onert/backend/cpu/ops/AvgPoolLayer.h deleted file mode 100644 index d4e8f79e7..000000000 --- a/runtime/onert/backend/cpu/ops/AvgPoolLayer.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class AvgPoolLayer : public ::onert::exec::IFunction -{ -public: - AvgPoolLayer(); - -public: - void averagePoolFloat32(); - - void averagePoolQuant8(); - - void configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; - - uint32_t _paddingLeft; - uint32_t _paddingTop; - uint32_t _paddingRight; - uint32_t _paddingBottom; - - uint32_t _strideWidth; - uint32_t _strideHeight; - uint32_t _kernelWidth; - uint32_t _kernelHeight; - - ir::Activation _activation; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_AVGPOOLLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc new file mode 100644 index 000000000..f50c63375 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.cc @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "BinaryArithmeticLayer.h" + +#include <cker/operation/BinaryArithmeticOps.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ + +template <nnfw::cker::BinaryArithmeticOpType arithmetic_type, typename T> +void eval(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + nnfw::cker::BinaryArithmeticOpParam op_params) +{ + const bool need_broadcast = + nnfw::cker::ProcessBroadcastShapes(getTensorShape(lhs), getTensorShape(rhs), &op_params); + if (need_broadcast) + { + nnfw::cker::BroadcastBinaryArithmeticOp<arithmetic_type>( + op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + return; + } + + nnfw::cker::BinaryArithmeticOp<arithmetic_type>( + op_params, getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <nnfw::cker::BinaryArithmeticOpType arithmetic_type> +std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> +generateKernelGeneric(const IPortableTensor *lhs, const ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam op_params) +{ + switch (lhs->data_type()) + { + case OperandType::FLOAT32: + { + float output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(activation, &output_activation_min, &output_activation_max); + op_params.float_activation_max = output_activation_max; + op_params.float_activation_min = output_activation_min; + return std::bind(&eval<arithmetic_type, float>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, op_params); + break; + } + case OperandType::INT32: + { + int32_t output_activation_min = 0, output_activation_max = 0; + CalculateActivationRange(activation, &output_activation_min, &output_activation_max); + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + return std::bind(eval<arithmetic_type, int32_t>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, op_params); + break; + } + default: + throw std::runtime_error{"BinaryArithmetic(generic): Unsupported data type"}; + } +} + +void setAddOrSubQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam &op_params = *params; + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + // Parameters for scaled quantized computation + op_params.left_shift = 20; + // Zero-points of input and output tensors + op_params.input1_offset = -lhs->data_offset(); + op_params.input2_offset = -rhs->data_offset(); + op_params.output_offset = output->data_offset(); + assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); + assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); + assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); + + // Compute normalized scale for _lhs and _rhs values, + // and represent in 32-bit fixed point + const double norm_max_scale = 2 * std::max(lhs->data_scale(), rhs->data_scale()); + const double real_lhs_scale = lhs->data_scale() / norm_max_scale; + const double real_rhs_scale = rhs->data_scale() / norm_max_scale; + // output scale is used to normalize final result, so we invert the scale here + const double real_output_scale = + norm_max_scale / (output->data_scale() * (1 << op_params.left_shift)); + + // Represent the scales as fixed int32_t multipliers, and int32_t shifts + QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); + QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); + QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); +} + +void setMulQuant8Params(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, ir::Activation activation, + nnfw::cker::BinaryArithmeticOpParam *params) +{ + int32_t output_activation_min, output_activation_max; + CalculateActivationRangeUint8(activation, output, &output_activation_min, &output_activation_max); + nnfw::cker::BinaryArithmeticOpParam &op_params = *params; + + op_params.quantized_activation_max = output_activation_max; + op_params.quantized_activation_min = output_activation_min; + op_params.input1_offset = -lhs->data_offset(); + op_params.input2_offset = -rhs->data_offset(); + op_params.output_offset = output->data_offset(); + + double real_multiplier = lhs->data_scale() * rhs->data_scale() / output->data_scale(); + QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift); +} + +} // namespace + +void BinaryArithmeticLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, const ir::Activation activation, + const ArithmeticType arithmetic_type) +{ + assert(lhs != nullptr); + assert(rhs != nullptr); + assert(output != nullptr); + + _lhs = lhs; + _rhs = rhs; + _output = output; + + nnfw::cker::BinaryArithmeticOpParam op_params; + switch (arithmetic_type) + { + case ArithmeticType::kAdd: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params); + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::ADD, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::ADD>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kSub: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + setAddOrSubQuant8Params(_lhs, _rhs, _output, activation, &op_params); + op_params.input2_multiplier *= -1; + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::SUB, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::SUB>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kMul: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + nnfw::cker::BinaryArithmeticOpParam op_params; + setMulQuant8Params(_lhs, _rhs, _output, activation, &op_params); + _kernel = std::bind(&eval<nnfw::cker::BinaryArithmeticOpType::MUL, uint8_t>, + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + op_params); + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::MUL>(_lhs, activation, + op_params); + } + break; + case ArithmeticType::kDiv: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + throw std::runtime_error{ + "BinaryArithmetic(Div): Div operation does not support quantization"}; + } + else if (_lhs->data_type() == OperandType::INT32) + { + throw std::runtime_error{"BinaryArithmetic(Div): Unsupported data type"}; + } + else + { + _kernel = generateKernelGeneric<nnfw::cker::BinaryArithmeticOpType::DIV>(_lhs, activation, + op_params); + } + break; + default: + throw std::runtime_error{"BinaryArithmetic: Unsupported BinaryArithmetic type"}; + } +} + +void BinaryArithmeticLayer::run() { _kernel(_lhs, _rhs, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/DivLayer.h b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h index 9411be76e..d6b33ad07 100644 --- a/runtime/onert/backend/cpu/ops/DivLayer.h +++ b/runtime/onert/backend/cpu/ops/BinaryArithmeticLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ #include <backend/IPortableTensor.h> #include "OperationUtils.h" @@ -31,21 +31,25 @@ namespace cpu namespace ops { -class DivLayer : public ::onert::exec::IFunction +enum class ArithmeticType +{ + kAdd, + kSub, + kMul, + kDiv, +}; + +class BinaryArithmeticLayer : public ::onert::exec::IFunction { public: - DivLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) + BinaryArithmeticLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) { // DO NOTHING } public: - void divFloat32(); - - void divQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); + void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + const ir::Activation activation, const ArithmeticType arithmetic_type); void run() override; @@ -54,7 +58,7 @@ private: const IPortableTensor *_rhs; IPortableTensor *_output; - ir::Activation _activation{ir::Activation::NONE}; + std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -62,4 +66,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_DIVLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_BINARYARITHMETICLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/CastLayer.cc b/runtime/onert/backend/cpu/ops/CastLayer.cc deleted file mode 100644 index 497515606..000000000 --- a/runtime/onert/backend/cpu/ops/CastLayer.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "CastLayer.h" - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -CastLayer::CastLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CastLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -template <typename FromT, typename ToT> void CastLayer::castTensor(const FromT *in, ToT *out) -{ - auto input_shape = getTensorShape(_input); - auto output_shape = getTensorShape(_output); - const auto num_elements = MatchingFlatSize(input_shape, output_shape); - - std::transform(in, in + num_elements, out, [](FromT a) { return static_cast<ToT>(a); }); -} - -template <typename FromT> void CastLayer::castPtr(const FromT *in, DataPtr out) -{ - switch (_output->data_type()) - { - case ir::DataType::FLOAT32: - castTensor(in, out.f); - return; - case ir::DataType::INT32: - castTensor(in, out.i32); - return; - case ir::DataType::UINT32: - castTensor(in, out.u32); - return; - case ir::DataType::UINT8: - castTensor(in, out.u8); - return; - case ir::DataType::BOOL8: - castTensor(in, out.b); - return; - case ir::DataType::INT64: - castTensor(in, out.i64); - return; - default: - throw std::runtime_error("Not supported output type" + - std::to_string((int)_output->data_type())); - } -} - -void CastLayer::run() -{ - auto input_buf = _input->buffer(); - auto output_buf = _output->buffer(); - const auto in = *reinterpret_cast<const DataPtr *>(&input_buf); - auto out = *reinterpret_cast<DataPtr *>(&output_buf); - - switch (_input->data_type()) - { - case ir::DataType::FLOAT32: - castPtr(in.f, out); - return; - case ir::DataType::INT32: - castPtr(in.i32, out); - return; - case ir::DataType::UINT32: - castPtr(in.u32, out); - return; - case ir::DataType::UINT8: - castPtr(in.u8, out); - return; - case ir::DataType::BOOL8: - castPtr(in.b, out); - return; - case ir::DataType::INT64: - castPtr(in.i64, out); - return; - default: - throw std::runtime_error("Cast: unsupported data type" + - std::to_string((int)_input->data_type())); - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/CastLayer.h b/runtime/onert/backend/cpu/ops/CastLayer.h deleted file mode 100644 index 290c722e2..000000000 --- a/runtime/onert/backend/cpu/ops/CastLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class CastLayer : public ::onert::exec::IFunction -{ -public: - CastLayer(); - -public: - template <typename FromT, typename ToT> void castTensor(const FromT *in, ToT *out); - template <typename FromT> void castPtr(const FromT *in, DataPtr out); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_CASTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc index 2d5bbef1e..c057267d3 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.cc @@ -31,7 +31,8 @@ namespace ops ConvolutionLayer::ConvolutionLayer() : _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr), _paddingType(ir::PaddingType::EXPLICIT), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _activation(ir::Activation::NONE), + _paddingBottom(0), _strideWidth(0), _strideHeight(0), _dilationWidthFactor(1), + _dilationHeightFactor(1), _activation(ir::Activation::NONE), _conv_kernel(new nnfw::cker::Conv()), _prepare(false) { // DO NOTHING @@ -50,8 +51,8 @@ void ConvolutionLayer::convFloat32() op_params.padding_values.height = _paddingTop; op_params.stride_width = _strideWidth; op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; @@ -78,8 +79,8 @@ void ConvolutionLayer::convQuant8() nnfw::cker::ConvParams op_params; op_params.stride_width = _strideWidth; op_params.stride_height = _strideHeight; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = _dilationWidthFactor; + op_params.dilation_height_factor = _dilationHeightFactor; op_params.padding_type = getPaddingType(_paddingType); op_params.padding_values.width = _paddingLeft; op_params.padding_values.height = _paddingTop; @@ -104,6 +105,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, + const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output) { _input = input; @@ -116,6 +119,8 @@ void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTe _paddingBottom = paddingBottom; _strideWidth = strideWidth; _strideHeight = strideHeight; + _dilationWidthFactor = dilationWidthFactor; + _dilationHeightFactor = dilationHeightFactor; _activation = activation; _output = output; } @@ -145,7 +150,8 @@ void ConvolutionLayer::run() param_padding.param.bottom = _paddingBottom; const auto padding = - ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height); + ir::calculatePadding(param_padding, ifm_shape, ofm_shape, stride, ker_width, ker_height, + _dilationWidthFactor, _dilationHeightFactor); _paddingLeft = padding.left; _paddingRight = padding.right; @@ -176,7 +182,8 @@ void ConvolutionLayer::prepare() { bool is_transposed = false; kernel.prepare(getTensorShape(_kernel), reinterpret_cast<const float *>(_kernel->buffer()), - getPaddingType(_paddingType), is_transposed); + getPaddingType(_paddingType), is_transposed, _dilationWidthFactor, + _dilationHeightFactor); // Decrease reference of _kernel(weights) only when _kernel is constant if (is_transposed) diff --git a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h index 2833387c4..398892e65 100644 --- a/runtime/onert/backend/cpu/ops/ConvolutionLayer.h +++ b/runtime/onert/backend/cpu/ops/ConvolutionLayer.h @@ -56,7 +56,8 @@ public: const IPortableTensor *bias, ir::PaddingType _paddingType, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const ir::Activation activation, + const uint32_t strideHeight, const uint32_t dilationWidthFactor, + const uint32_t dilationHeightFactor, const ir::Activation activation, IPortableTensor *output); void run() override; @@ -77,6 +78,8 @@ private: uint32_t _strideWidth; uint32_t _strideHeight; + uint32_t _dilationWidthFactor; + uint32_t _dilationHeightFactor; ir::Activation _activation; diff --git a/runtime/onert/backend/cpu/ops/CosLayer.cc b/runtime/onert/backend/cpu/ops/CosLayer.cc deleted file mode 100644 index 9417019d5..000000000 --- a/runtime/onert/backend/cpu/ops/CosLayer.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "CosLayer.h" -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -CosLayer::CosLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void CosLayer::cosFloat32() -{ - nnfw::cker::Cos(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void CosLayer::cosQuant8() { throw std::runtime_error{"NYI"}; } - -void CosLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void CosLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - cosFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - cosQuant8(); - } - else - { - throw std::runtime_error{"Cos: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/CosLayer.h b/runtime/onert/backend/cpu/ops/CosLayer.h deleted file mode 100644 index 1fadef718..000000000 --- a/runtime/onert/backend/cpu/ops/CosLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class CosLayer : public ::onert::exec::IFunction -{ -public: - CosLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void cosFloat32(); - void cosQuant8(); - - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_COSLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/DivLayer.cc b/runtime/onert/backend/cpu/ops/DivLayer.cc deleted file mode 100644 index 556c55e33..000000000 --- a/runtime/onert/backend/cpu/ops/DivLayer.cc +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DivLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void DivLayer::divFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool requires_broadcast = !HaveSameShapes(_lhs, _rhs); - if (requires_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - } - else - { - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::DIV>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - } -} - -void DivLayer::divQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - // op_params.quantized_activation_max = output_activation_max; - // op_params.quantized_activation_min = output_activation_min; - - // cker quant8 div is not implemented yet - throw std::runtime_error{"Div NYI for quantized"}; -} - -void DivLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void DivLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - divFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - divQuant8(); - } - else - { - throw std::runtime_error{"Div: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc new file mode 100644 index 000000000..c1d63172b --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseActivationLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Logistic.h> +#include <cker/operation/ReLU.h> +#include <cker/operation/ReLU6.h> +#include <cker/operation/Tanh.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +ElementwiseActivationLayer::ElementwiseActivationLayer() + : _input(nullptr), _output(nullptr), _kernel() +{ + // DO NOTHING +} + +void ElementwiseActivationLayer::PopulateLookupTable(const ElementwiseActivationType op_type) +{ + const auto input_scale = static_cast<double>(_input->data_scale()); + const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); + const auto output_scale = static_cast<double>(_output->data_scale()); + const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); + const float inverse_scale = 1 / output_scale; + int32_t maxval = std::numeric_limits<uint8_t>::max(); + int32_t minval = std::numeric_limits<uint8_t>::min(); + for (int32_t val = minval; val <= maxval; ++val) + { + const float dequantized = input_scale * (val - input_zero_point); + float transformed = 0.f; + if (op_type == ElementwiseActivationType::kTanh) + { + transformed = std::tanh(dequantized); + } + else if (op_type == ElementwiseActivationType::kLogistic) + { + transformed = 1.0f / (1.0f + std::exp(-dequantized)); + } + else + { + throw std::runtime_error("ElementwiseActivationLayer : unsupported activation type"); + } + const float rescaled = std::round(transformed * inverse_scale); + const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); + _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); + } +} + +void ElementwiseActivationLayer::EvalUsingLookupTable(const IPortableTensor *input, + IPortableTensor *output) +{ + const int size = MatchingFlatSize(getTensorShape(input), getTensorShape(output)); + const uint8_t *input_data = reinterpret_cast<const uint8_t *>(input->buffer()); + uint8_t *output_data = reinterpret_cast<uint8_t *>(output->buffer()); + + for (int i = 0; i < size; ++i) + { + output_data[i] = _table[input_data[i]]; + } +} + +void ElementwiseActivationLayer::configure(const IPortableTensor *input, IPortableTensor *output, + float alpha, float beta, + ElementwiseActivationType op_type) +{ + _input = input; + _output = output; + + switch (op_type) + { + case ElementwiseActivationType::kLogistic: + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(op_type); + _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this, + std::placeholders::_1, std::placeholders::_2); + } + else if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::Logistic(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; + } + break; + case ElementwiseActivationType::kReLU: + if (_input->data_type() == OperandType::FLOAT32) + { + if (alpha == std::numeric_limits<float>::infinity() && beta == 0.f) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ReLU(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else if (alpha == 6.f && beta == 0.f) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::ReLU6(getTensorShape(input), + reinterpret_cast<const float *>(input->buffer()), + reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error( + "ElementwiseActivationLayer : This layer suppports only ReLU(0-inf) and ReLU6(0-6)"); + } + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(ReLU): unsupported data type"}; + } + break; + case ElementwiseActivationType::kTanh: + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(op_type); + _kernel = std::bind(&ElementwiseActivationLayer::EvalUsingLookupTable, this, + std::placeholders::_1, std::placeholders::_2); + } + else if (_input->data_type() == OperandType::FLOAT32) + { + _kernel = [](const IPortableTensor *input, IPortableTensor *output) { + nnfw::cker::Tanh(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); + }; + } + else + { + throw std::runtime_error{"ElementwiseActivationLayer(Logistic): unsupported data type"}; + } + break; + default: + throw std::runtime_error("ElementwiseActivationLayer: unsupported op type"); + } +} + +void ElementwiseActivationLayer::run() { _kernel(_input, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h index 35a184074..3ef580041 100644 --- a/runtime/onert/backend/cpu/ops/TanhLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseActivationLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,26 +30,33 @@ namespace cpu namespace ops { -class TanhLayer : public ::onert::exec::IFunction +enum class ElementwiseActivationType { -public: - TanhLayer(); + kLogistic, + kReLU, + kTanh +}; +class ElementwiseActivationLayer : public ::onert::exec::IFunction +{ public: - void tanhFloat32(); + ElementwiseActivationLayer(); - void tanhQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); +public: + void configure(const IPortableTensor *input, IPortableTensor *output, float alpha, float beta, + const ElementwiseActivationType op_type); void run() override; - void PopulateLookupTable(); + void PopulateLookupTable(const ElementwiseActivationType op_type); + + void EvalUsingLookupTable(const IPortableTensor *input, IPortableTensor *output); private: const IPortableTensor *_input; IPortableTensor *_output; uint8_t _table[256]; + std::function<void(const IPortableTensor *input, IPortableTensor *output)> _kernel; }; } // namespace ops @@ -57,4 +64,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_TANHLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ElementwiseActivationLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc new file mode 100644 index 000000000..ea3c1e7cd --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseBinaryLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/LogicalOr.h> +#include <cker/operation/MaxMin.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +template <typename T> +void logicalOrGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output) +{ + if (!HaveSameShapes(lhs, rhs)) + { + nnfw::cker::LogicalOrBroadcast<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), getTensorShape(rhs), + reinterpret_cast<const T *>(rhs->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); + } + else + { + nnfw::cker::LogicalOrElementwise<T>( + getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + reinterpret_cast<const T *>(rhs->buffer()), reinterpret_cast<T *>(output->buffer())); + } +} + +template <typename T> +void maximumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) +{ + nnfw::cker::Max<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), + getTensorShape(output), reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +void minimumGeneric(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output) +{ + nnfw::cker::Min<T>(getTensorShape(lhs), reinterpret_cast<const T *>(lhs->buffer()), + getTensorShape(rhs), reinterpret_cast<const T *>(rhs->buffer()), + getTensorShape(output), reinterpret_cast<T *>(output->buffer())); +} + +bool haveSameQauntInfo(const IPortableTensor *lhs, const IPortableTensor *rhs, + const IPortableTensor *output) +{ + return (lhs->data_scale() == rhs->data_scale() && lhs->data_scale() == output->data_scale()) && + (lhs->data_offset() == rhs->data_offset() && lhs->data_offset() == output->data_offset()); +} +} // namespace + +void ElementwiseBinaryLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, + IPortableTensor *output, const ElementwiseBinaryType op_type) +{ + assert(lhs != nullptr); + assert(rhs != nullptr); + assert(output != nullptr); + + _lhs = lhs; + _rhs = rhs; + _output = output; + + switch (op_type) + { + case ElementwiseBinaryType::kLogicalOr: + if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) + { + _kernel = logicalOrGeneric<bool>; + } + else + { + throw std::runtime_error{"LogicalOr: Unsupported data type"}; + } + break; + case ElementwiseBinaryType::kMax: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + if (!haveSameQauntInfo(_lhs, _rhs, _output)) + { + throw std::runtime_error("Max NYI for quantized"); + } + _kernel = maximumGeneric<uint8_t>; + } + else if (_lhs->data_type() == OperandType::FLOAT32) + { + _kernel = maximumGeneric<float>; + } + else + { + throw std::runtime_error{"Max: unsupported data type"}; + } + break; + case ElementwiseBinaryType::kMin: + if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + if (!haveSameQauntInfo(_lhs, _rhs, _output)) + { + throw std::runtime_error("Min NYI for quantized"); + } + _kernel = minimumGeneric<uint8_t>; + } + else if (_lhs->data_type() == OperandType::INT32) + { + _kernel = minimumGeneric<int32_t>; + } + else if (_lhs->data_type() == OperandType::FLOAT32) + { + _kernel = minimumGeneric<float>; + } + else + { + throw std::runtime_error{"Min: unsupported data type"}; + } + break; + default: + throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"}; + } +} + +void ElementwiseBinaryLayer::run() { _kernel(_lhs, _rhs, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.h b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h index ed8dc5b0f..052747a4c 100644 --- a/runtime/onert/backend/cpu/ops/MaxLayer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseBinaryLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,20 +30,25 @@ namespace cpu namespace ops { -class MaxLayer : public ::onert::exec::IFunction +enum class ElementwiseBinaryType +{ + kLogicalAnd, + kLogicalOr, + kMax, + kMin, +}; + +class ElementwiseBinaryLayer : public ::onert::exec::IFunction { public: - MaxLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) + ElementwiseBinaryLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) { // DO NOTHING } public: - template <typename T> void maximum(); - - void maxQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output); + void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output, + const ElementwiseBinaryType op_type); void run() override; @@ -51,6 +56,7 @@ private: const IPortableTensor *_lhs; const IPortableTensor *_rhs; IPortableTensor *_output; + std::function<void(const IPortableTensor *, const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -58,4 +64,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_MAXLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEBINARYLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc new file mode 100644 index 000000000..f8f89ab15 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.cc @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ElementwiseUnaryLayer.h" + +#include "OperationUtils.h" + +#include <cker/operation/Elementwise.h> +#include <cker/operation/Erf.h> +#include <cker/operation/Exp.h> +#include <cker/operation/LogicalNot.h> +#include <cker/operation/Quantize.h> +#include <cker/operation/Round.h> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +void absFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Abs(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename FromT> +void castPtr(const FromT *in, DataPtr out, int num_elements, ir::DataType data_type_out) +{ + switch (data_type_out) + { + case ir::DataType::FLOAT32: + std::transform(in, in + num_elements, out.f, [](FromT a) { return static_cast<float>(a); }); + return; + case ir::DataType::INT32: + std::transform(in, in + num_elements, out.i32, + [](FromT a) { return static_cast<int32_t>(a); }); + return; + case ir::DataType::UINT32: + std::transform(in, in + num_elements, out.u32, + [](FromT a) { return static_cast<uint32_t>(a); }); + return; + case ir::DataType::UINT8: + std::transform(in, in + num_elements, out.u8, + [](FromT a) { return static_cast<uint8_t>(a); }); + return; + case ir::DataType::BOOL8: + std::transform(in, in + num_elements, out.b, [](FromT a) { return static_cast<bool>(a); }); + return; + case ir::DataType::INT64: + std::transform(in, in + num_elements, out.i64, + [](FromT a) { return static_cast<int64_t>(a); }); + return; + default: + throw std::runtime_error("Cast: Not supported output type" + + std::to_string((int)data_type_out)); + } +} + +void cast(const IPortableTensor *input, IPortableTensor *output) +{ + auto input_buf = input->buffer(); + auto output_buf = output->buffer(); + const auto in = *reinterpret_cast<const DataPtr *>(&input_buf); + auto out = *reinterpret_cast<DataPtr *>(&output_buf); + + auto input_shape = getTensorShape(input); + auto output_shape = getTensorShape(output); + const auto num_elements = MatchingFlatSize(input_shape, output_shape); + + switch (input->data_type()) + { + case ir::DataType::FLOAT32: + castPtr(in.f, out, num_elements, output->data_type()); + return; + case ir::DataType::INT32: + castPtr(in.i32, out, num_elements, output->data_type()); + return; + case ir::DataType::UINT32: + castPtr(in.u32, out, num_elements, output->data_type()); + return; + case ir::DataType::UINT8: + castPtr(in.u8, out, num_elements, output->data_type()); + return; + case ir::DataType::BOOL8: + castPtr(in.b, out, num_elements, output->data_type()); + return; + case ir::DataType::INT64: + castPtr(in.i64, out, num_elements, output->data_type()); + return; + default: + throw std::runtime_error("Cast: unsupported data type" + + std::to_string((int)input->data_type())); + } +} + +void cosFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Cos(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void expFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Exp(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void erfFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Erf(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void logFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Log(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void logicalNot(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::LogicalNot(getTensorShape(input), reinterpret_cast<const bool *>(input->buffer()), + getTensorShape(output), reinterpret_cast<bool *>(output->buffer())); +} + +void negFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Neg(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename InputT, typename OutputT> +void affineQuantize(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Quantize(getTensorShape(input), reinterpret_cast<const InputT *>(input->buffer()), + getTensorShape(output), reinterpret_cast<OutputT *>(output->buffer()), + output->data_scale(), output->data_offset()); +} + +void roundFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Round(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void rsqrtFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Rsqrt(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +void sinFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + nnfw::cker::Sin(getTensorShape(input), reinterpret_cast<const float *>(input->buffer()), + getTensorShape(output), reinterpret_cast<float *>(output->buffer())); +} + +template <typename T> void zerosLikeFloat32(const IPortableTensor *input, IPortableTensor *output) +{ + if (!HaveSameShapes(input, output)) + throw std::runtime_error{"ZerosLike: input and output shape don't match."}; + + auto element_size = getTensorShape(input).FlatSize(); + + memset(reinterpret_cast<T *>(output->buffer()), 0, element_size * sizeof(T)); +} +} // namespace + +void ElementwiseUnaryLayer::configure(const IPortableTensor *input, IPortableTensor *output, + const ElementwiseUnaryType op_type) +{ + assert(input != nullptr); + assert(output != nullptr); + + _input = input; + _output = output; + + switch (op_type) + { + case ElementwiseUnaryType::kAbs: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = absFloat32; + } + else + { + throw std::runtime_error{"Abs: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kCast: + _kernel = cast; + break; + case ElementwiseUnaryType::kCos: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = cosFloat32; + } + else + { + throw std::runtime_error{"Cos: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kExp: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = expFloat32; + } + else + { + throw std::runtime_error{"Exp: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kErf: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = erfFloat32; + } + else + { + throw std::runtime_error{"Exp: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kLog: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = logFloat32; + } + else + { + throw std::runtime_error{"Log: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kLogicalNot: + if ((input->data_type() == OperandType::BOOL8)) + { + _kernel = logicalNot; + } + else + { + throw std::runtime_error{"LogicalNot: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kNeg: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = negFloat32; + } + else + { + throw std::runtime_error{"Neg: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kQuantize: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = affineQuantize<float, uint8_t>; + } + else + { + throw std::runtime_error{"Quantize: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kRound: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = roundFloat32; + } + else + { + throw std::runtime_error{"Round: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kRSqrt: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = rsqrtFloat32; + } + else + { + throw std::runtime_error{"RSqrt: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kSin: + if ((input->data_type() == OperandType::FLOAT32)) + { + _kernel = sinFloat32; + } + else + { + throw std::runtime_error{"Sin: Unsupported data type"}; + } + break; + case ElementwiseUnaryType::kZerosLike: + if (input->data_type() == OperandType::FLOAT32) + { + _kernel = zerosLikeFloat32<float>; + } + else if (input->data_type() == OperandType::INT32) + { + _kernel = zerosLikeFloat32<int32_t>; + } + else + { + throw std::runtime_error{"ZerosLike: Unsupported data type"}; + } + break; + default: + throw std::runtime_error{"ElementwiseBinary: Unsupported ElementwiseBinary type"}; + } +} + +void ElementwiseUnaryLayer::run() { _kernel(_input, _output); } + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.h b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h index 994d17a30..74968386d 100644 --- a/runtime/onert/backend/cpu/ops/ReLU6Layer.h +++ b/runtime/onert/backend/cpu/ops/ElementwiseUnaryLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ #include <backend/IPortableTensor.h> @@ -30,23 +30,41 @@ namespace cpu namespace ops { -class ReLU6Layer : public ::onert::exec::IFunction +enum class ElementwiseUnaryType { -public: - ReLU6Layer(); + kAbs, + kCast, + kCos, + kErf, + kExp, + kLog, + kLogicalNot, + kNeg, + kQuantize, + kRound, + kRSqrt, + kSin, + kZerosLike +}; +class ElementwiseUnaryLayer : public ::onert::exec::IFunction +{ public: - void relu6Float32(); + ElementwiseUnaryLayer() : _input(nullptr), _output(nullptr), _kernel() + { + // DO NOTHING + } - void relu6Quant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); +public: + void configure(const IPortableTensor *input, IPortableTensor *output, + const ElementwiseUnaryType op_type); void run() override; private: const IPortableTensor *_input; IPortableTensor *_output; + std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -54,4 +72,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_RELU6LAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_ELEMENTWISEUNARYLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.cc b/runtime/onert/backend/cpu/ops/ExpLayer.cc deleted file mode 100644 index 4dbec9cd5..000000000 --- a/runtime/onert/backend/cpu/ops/ExpLayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ExpLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Exp.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ExpLayer::ExpLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ExpLayer::expFloat32() -{ - nnfw::cker::Exp(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void ExpLayer::expQuant8() -{ - // cker quant8 exp is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ExpLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ExpLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - expFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - expQuant8(); - } - else - { - throw std::runtime_error{"Exp: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogLayer.cc b/runtime/onert/backend/cpu/ops/LogLayer.cc deleted file mode 100644 index 307c15bc4..000000000 --- a/runtime/onert/backend/cpu/ops/LogLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogLayer::LogLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogLayer::logFloat32() -{ - nnfw::cker::Log(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void LogLayer::logQuant8() { throw std::runtime_error{"NYI"}; } - -void LogLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void LogLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - logFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - logQuant8(); - } - else - { - throw std::runtime_error{"Log: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogLayer.h b/runtime/onert/backend/cpu/ops/LogLayer.h deleted file mode 100644 index 2f6b4b570..000000000 --- a/runtime/onert/backend/cpu/ops/LogLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogLayer : public ::onert::exec::IFunction -{ -public: - LogLayer(); - -public: - void logFloat32(); - - void logQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc index 06dde4fc4..1d7ee6caa 100644 --- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.cc @@ -34,6 +34,16 @@ LogSoftMaxLayer::LogSoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0. // DO NOTHING } +void LogSoftMaxLayer::PopulateLookupTable(const float kBeta) +{ + const float scale = -_input->data_scale() * kBeta; + const int32_t max_uint8 = std::numeric_limits<uint8_t>::max(); + for (int32_t val = 0; val <= max_uint8; ++val) + { + _table[max_uint8 - val] = expf(scale * val); + } +} + void LogSoftMaxLayer::logsoftmaxFloat32() { nnfw::cker::SoftmaxParams op_params; @@ -46,7 +56,15 @@ void LogSoftMaxLayer::logsoftmaxFloat32() void LogSoftMaxLayer::logsoftmaxQuant8() { - // NYI + nnfw::cker::SoftmaxParams op_params; + op_params.beta = _beta; + op_params.axis = _axis; + op_params.table = _table; + op_params.zero_point = _output->data_offset(); + op_params.scale = _output->data_scale(); + nnfw::cker::LogSoftmax(op_params, _input->data_scale(), getTensorShape(_input), + reinterpret_cast<const uint8_t *>(_input->buffer()), + getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); } void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, const int axis, @@ -56,6 +74,10 @@ void LogSoftMaxLayer::configure(const IPortableTensor *input, const float beta, _output = output; _beta = beta; _axis = axis; + if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + PopulateLookupTable(_beta); + } } void LogSoftMaxLayer::run() @@ -66,7 +88,7 @@ void LogSoftMaxLayer::run() } else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - throw std::runtime_error{"LogSoftmax : NYI"}; + logsoftmaxQuant8(); } else { diff --git a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h index ba9deca17..1533f3361 100644 --- a/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h +++ b/runtime/onert/backend/cpu/ops/LogSoftMaxLayer.h @@ -45,12 +45,15 @@ public: void run(); + void PopulateLookupTable(const float kBeta); + private: const IPortableTensor *_input; IPortableTensor *_output; float _beta; int _axis; + float _table[256]; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc b/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc deleted file mode 100644 index f2192c148..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogicalNotLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/LogicalNot.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogicalNotLayer::LogicalNotLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogicalNotLayer::logicalNotBool8() -{ - nnfw::cker::LogicalNot(getTensorShape(_input), reinterpret_cast<const bool *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<bool *>(_output->buffer())); -} - -void LogicalNotLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void LogicalNotLayer::run() -{ - if (_input->data_type() == OperandType::BOOL8) - { - logicalNotBool8(); - } - else - { - throw std::runtime_error{"LogicalNot: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h b/runtime/onert/backend/cpu/ops/LogicalNotLayer.h deleted file mode 100644 index 5543cca3d..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalNotLayer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogicalNotLayer : public ::onert::exec::IFunction -{ -public: - LogicalNotLayer(); - -public: - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void logicalNotBool8(); - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGICALNOTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc b/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc deleted file mode 100644 index 5b7c9f6f0..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogicalOrLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/LogicalOr.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -void LogicalOrLayer::lorBool8() -{ - if (!HaveSameShapes(_lhs, _rhs)) - { - nnfw::cker::LogicalOrBroadcast<bool>( - getTensorShape(_lhs), reinterpret_cast<const bool *>(_lhs->buffer()), getTensorShape(_rhs), - reinterpret_cast<const bool *>(_rhs->buffer()), getTensorShape(_output), - reinterpret_cast<bool *>(_output->buffer())); - } - else - { - nnfw::cker::LogicalOrElementwise<bool>(getTensorShape(_lhs), - reinterpret_cast<const bool *>(_lhs->buffer()), - reinterpret_cast<const bool *>(_rhs->buffer()), - reinterpret_cast<bool *>(_output->buffer())); - } -} - -void LogicalOrLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void LogicalOrLayer::run() -{ - if ((_lhs->data_type() == OperandType::BOOL8) && (_rhs->data_type() == OperandType::BOOL8)) - { - lorBool8(); - } - else - { - throw std::runtime_error{"LogicalOr: Unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h b/runtime/onert/backend/cpu/ops/LogicalOrLayer.h deleted file mode 100644 index efaf396e8..000000000 --- a/runtime/onert/backend/cpu/ops/LogicalOrLayer.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class LogicalOrLayer : public ::onert::exec::IFunction -{ -public: - LogicalOrLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // Nothing - } - -public: - void configure(const IPortableTensor *_lhs, const IPortableTensor *_rhs, IPortableTensor *output); - - void run() override; - -private: - void lorBool8(); - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGICAL_OR_LAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.cc b/runtime/onert/backend/cpu/ops/LogisticLayer.cc deleted file mode 100644 index 140ab4d2c..000000000 --- a/runtime/onert/backend/cpu/ops/LogisticLayer.cc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "LogisticLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Logistic.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -LogisticLayer::LogisticLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void LogisticLayer::populateLookupTable() -{ - const auto input_scale = static_cast<double>(_input->data_scale()); - const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); - const auto output_scale = static_cast<double>(_output->data_scale()); - const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); - const float inverse_scale = 1 / output_scale; - int32_t maxval = std::numeric_limits<uint8_t>::max(); - int32_t minval = std::numeric_limits<uint8_t>::min(); - for (int32_t val = minval; val <= maxval; ++val) - { - const float dequantized = input_scale * (val - input_zero_point); - const float transformed = 1.0f / (1.0f + std::exp(-dequantized)); - const float rescaled = std::round(transformed * inverse_scale); - const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); - _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); - } -} - -void LogisticLayer::logisticFloat32() -{ - nnfw::cker::Logistic(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void LogisticLayer::logisticQuant8() -{ - const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output)); - const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer()); - uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer()); - - for (int i = 0; i < size; ++i) - { - output_data[i] = _table[input_data[i]]; - } -} - -void LogisticLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; - - if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - if (_output->data_scale() != 1.f / 256) - { - throw std::runtime_error{"incorrect scale for output"}; - } - populateLookupTable(); - } -} - -void LogisticLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - logisticFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - logisticQuant8(); - } - else - { - throw std::runtime_error{"Logistic: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/LogisticLayer.h b/runtime/onert/backend/cpu/ops/LogisticLayer.h deleted file mode 100644 index cac77939d..000000000 --- a/runtime/onert/backend/cpu/ops/LogisticLayer.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class LogisticLayer : public ::onert::exec::IFunction -{ -public: - LogisticLayer(); - -public: - void logisticFloat32(); - - void logisticQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - void populateLookupTable(); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; - - uint8_t _table[256]; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_LOGISTICLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/MaxLayer.cc b/runtime/onert/backend/cpu/ops/MaxLayer.cc deleted file mode 100644 index 9631983be..000000000 --- a/runtime/onert/backend/cpu/ops/MaxLayer.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MaxLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/MaxMin.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -template <typename T> void MaxLayer::maximum() -{ - nnfw::cker::Max<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<T *>(_output->buffer())); -} - -void MaxLayer::maxQuant8() -{ - if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale()) - { - if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset()) - { - return nnfw::cker::Max<uint8_t>( - getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - } - } - throw std::runtime_error("Max NYI for quantized"); -} - -void MaxLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void MaxLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - maximum<float>(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - maxQuant8(); - } - else - { - throw std::runtime_error{"Max: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc b/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc deleted file mode 100644 index 1e983b408..000000000 --- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MaxPoolLayer.h" - -#include <cker/operation/MaxPool.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -#define MAXPOOLING_PARAMETERS \ - nnfw::cker::PoolParams op_params; \ - op_params.stride_height = _strideHeight; \ - op_params.stride_width = _strideWidth; \ - op_params.filter_height = _kernelHeight; \ - op_params.filter_width = _kernelWidth; \ - op_params.padding_values.height = (int8_t)_paddingTop; \ - op_params.padding_values.width = (int8_t)_paddingLeft; - -MaxPoolLayer::MaxPoolLayer() - : _input(nullptr), _output(nullptr), _paddingLeft(0), _paddingTop(0), _paddingRight(0), - _paddingBottom(0), _strideWidth(0), _strideHeight(0), _kernelWidth(0), _kernelHeight(0), - _activation(ir::Activation::NONE) -{ - // DO NOTHING -} - -void MaxPoolLayer::maxPoolFloat32() -{ - MAXPOOLING_PARAMETERS - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - nnfw::cker::MaxPool(op_params, getTensorShape(_input), - reinterpret_cast<const float *>(_input->buffer()), getTensorShape(_output), - reinterpret_cast<float *>(_output->buffer())); -} -void MaxPoolLayer::maxPoolQuant8() -{ - MAXPOOLING_PARAMETERS - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - nnfw::cker::MaxPool(op_params, getTensorShape(_input), - reinterpret_cast<const uint8_t *>(_input->buffer()), getTensorShape(_output), - reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void MaxPoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, - const uint32_t paddingRight, const uint32_t paddingTop, - const uint32_t paddingBottom, const uint32_t strideWidth, - const uint32_t strideHeight, const uint32_t kernelWidth, - const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output) -{ - _input = input; - _paddingLeft = paddingLeft; - _paddingRight = paddingRight; - _paddingTop = paddingTop; - _paddingBottom = paddingBottom; - _strideWidth = strideWidth; - _strideHeight = strideHeight; - _kernelWidth = kernelWidth; - _kernelHeight = kernelHeight; - _activation = activation; - _output = output; -} - -void MaxPoolLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - maxPoolFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - maxPoolQuant8(); - } - else - { - throw std::runtime_error{"MaxPool: unsupported data type"}; - } -} - -#undef MAXPOOLING_PARAMETERS - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MinLayer.cc b/runtime/onert/backend/cpu/ops/MinLayer.cc deleted file mode 100644 index 20859673b..000000000 --- a/runtime/onert/backend/cpu/ops/MinLayer.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MinLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/MaxMin.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -template <typename T> void MinLayer::minimum() -{ - nnfw::cker::Min<T>(getTensorShape(_lhs), reinterpret_cast<const T *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const T *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<T *>(_output->buffer())); -} - -void MinLayer::minQuant8() -{ - if (_lhs->data_scale() == _rhs->data_scale() && _lhs->data_scale() == _output->data_scale()) - { - if (_lhs->data_offset() == _rhs->data_offset() && _lhs->data_offset() == _output->data_offset()) - { - return nnfw::cker::Min<uint8_t>( - getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - } - } - throw std::runtime_error("Min NYI for quantized"); -} - -void MinLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - IPortableTensor *output) -{ - assert(lhs != nullptr); - assert(rhs != nullptr); - assert(output != nullptr); - - _lhs = lhs; - _rhs = rhs; - _output = output; -} - -void MinLayer::run() -{ - if (_lhs->data_type() == OperandType::FLOAT32) - { - minimum<float>(); - } - else if (_lhs->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - minQuant8(); - } - else if (_lhs->data_type() == OperandType::INT32) - { - minimum<int32_t>(); - } - else - { - throw std::runtime_error{"Min: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MinLayer.h b/runtime/onert/backend/cpu/ops/MinLayer.h deleted file mode 100644 index 9bd114e54..000000000 --- a/runtime/onert/backend/cpu/ops/MinLayer.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class MinLayer : public ::onert::exec::IFunction -{ -public: - MinLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - template <typename T> void minimum(); - - void minQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_MINLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/MulLayer.cc b/runtime/onert/backend/cpu/ops/MulLayer.cc deleted file mode 100644 index eef73edf3..000000000 --- a/runtime/onert/backend/cpu/ops/MulLayer.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "MulLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void MulLayer::mulFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void MulLayer::mulQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - - double real_multiplier = _lhs->data_scale() * _rhs->data_scale() / _output->data_scale(); - QuantizeMultiplier(real_multiplier, &op_params.output_multiplier, &op_params.output_shift); - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::MUL>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void MulLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void MulLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - mulFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - mulQuant8(); - } - else - { - throw std::runtime_error{"Mul: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MulLayer.h b/runtime/onert/backend/cpu/ops/MulLayer.h deleted file mode 100644 index 2c4a98875..000000000 --- a/runtime/onert/backend/cpu/ops/MulLayer.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class MulLayer : public ::onert::exec::IFunction -{ -public: - MulLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void mulFloat32(); - - void mulQuant8(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_MULLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/NegLayer.cc b/runtime/onert/backend/cpu/ops/NegLayer.cc deleted file mode 100644 index 2cb95b771..000000000 --- a/runtime/onert/backend/cpu/ops/NegLayer.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "NegLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -NegLayer::NegLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void NegLayer::negFloat32() -{ - nnfw::cker::Neg(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void NegLayer::negQuant8() { throw std::runtime_error{"NYI"}; } - -void NegLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void NegLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - negFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - negQuant8(); - } - else - { - throw std::runtime_error{"Neg: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/NegLayer.h b/runtime/onert/backend/cpu/ops/NegLayer.h deleted file mode 100644 index addf84ec2..000000000 --- a/runtime/onert/backend/cpu/ops/NegLayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class NegLayer : public ::onert::exec::IFunction -{ -public: - NegLayer(); - -public: - void negFloat32(); - - void negQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_NEGLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/PoolLayer.cc b/runtime/onert/backend/cpu/ops/PoolLayer.cc new file mode 100644 index 000000000..85d02a751 --- /dev/null +++ b/runtime/onert/backend/cpu/ops/PoolLayer.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PoolLayer.h" + +#include <cker/operation/AveragePool.h> +#include <cker/operation/MaxPool.h> + +#include <unordered_map> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ +namespace ops +{ + +namespace +{ +template <typename T> +void avgPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input, + IPortableTensor *output) +{ + nnfw::cker::AveragePool<T>(params, getTensorShape(input), + reinterpret_cast<const T *>(input->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +void maxPool2D(const nnfw::cker::PoolParams ¶ms, const IPortableTensor *input, + IPortableTensor *output) +{ + nnfw::cker::MaxPool<T>(params, getTensorShape(input), + reinterpret_cast<const T *>(input->buffer()), getTensorShape(output), + reinterpret_cast<T *>(output->buffer())); +} + +template <typename T> +std::function<void(const IPortableTensor *, IPortableTensor *)> +generateKernelGeneric(const nnfw::cker::PoolParams ¶ms, PoolType op_type) +{ + if (op_type == PoolType::kAvg) + { + return std::bind(&avgPool2D<T>, params, std::placeholders::_1, std::placeholders::_2); + } + else if (op_type == PoolType::kMax) + { + return std::bind(&maxPool2D<T>, params, std::placeholders::_1, std::placeholders::_2); + } + else + { + throw std::runtime_error{"Pool: unsupported pool type"}; + } +} +} // namespace + +PoolLayer::PoolLayer() : _input(nullptr), _output(nullptr), _kernel() +{ + // DO NOTHING +} + +#define POOLING_PARAMETERS \ + nnfw::cker::PoolParams op_params; \ + op_params.stride_height = strideHeight; \ + op_params.stride_width = strideWidth; \ + op_params.filter_height = kernelHeight; \ + op_params.filter_width = kernelWidth; \ + op_params.padding_values.height = (int8_t)paddingTop; \ + op_params.padding_values.width = (int8_t)paddingLeft; + +void PoolLayer::configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t, + const uint32_t paddingTop, const uint32_t, const uint32_t strideWidth, + const uint32_t strideHeight, const uint32_t kernelWidth, + const uint32_t kernelHeight, const ir::Activation activation, + IPortableTensor *output, const PoolType op_type) +{ + assert(input != nullptr); + assert(output != nullptr); + + _input = input; + _output = output; + + POOLING_PARAMETERS + if (_input->data_type() == OperandType::FLOAT32) + { + float output_activation_min = 0; + float output_activation_max = 0; + CalculateActivationRange<float>(activation, &output_activation_min, &output_activation_max); + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + _kernel = generateKernelGeneric<float>(op_params, op_type); + } + else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) + { + int32_t output_activation_min = 0; + int32_t output_activation_max = 0; + CalculateActivationRangeUint8(activation, _output, &output_activation_min, + &output_activation_max); + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + _kernel = generateKernelGeneric<uint8_t>(op_params, op_type); + } + else + { + throw std::runtime_error{"Pool: unsupported data type"}; + } +} + +void PoolLayer::run() { _kernel(_input, _output); } + +#undef AVGPOOLING_PARAMETERS + +} // namespace ops +} // namespace cpu +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h b/runtime/onert/backend/cpu/ops/PoolLayer.h index 4c5109f64..b37835946 100644 --- a/runtime/onert/backend/cpu/ops/MaxPoolLayer.h +++ b/runtime/onert/backend/cpu/ops/PoolLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ #include <backend/IPortableTensor.h> #include "OperationUtils.h" @@ -31,22 +31,25 @@ namespace cpu namespace ops { -class MaxPoolLayer : public ::onert::exec::IFunction +enum class PoolType { -public: - MaxPoolLayer(); + kAvg, + kL2, + kMax, +}; +class PoolLayer : public ::onert::exec::IFunction +{ public: - void maxPoolFloat32(); - - void maxPoolQuant8(); + PoolLayer(); +public: void configure(const IPortableTensor *input, const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop, const uint32_t paddingBottom, const uint32_t strideWidth, const uint32_t strideHeight, const uint32_t kernelWidth, const uint32_t kernelHeight, const ir::Activation activation, - IPortableTensor *output); + IPortableTensor *output, const PoolType op_type); void run() override; @@ -54,17 +57,7 @@ private: const IPortableTensor *_input; IPortableTensor *_output; - uint32_t _paddingLeft; - uint32_t _paddingTop; - uint32_t _paddingRight; - uint32_t _paddingBottom; - - uint32_t _strideWidth; - uint32_t _strideHeight; - uint32_t _kernelWidth; - uint32_t _kernelHeight; - - ir::Activation _activation; + std::function<void(const IPortableTensor *, IPortableTensor *)> _kernel; }; } // namespace ops @@ -72,4 +65,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_MAXPOOLLAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_POOLLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc b/runtime/onert/backend/cpu/ops/QuantizeLayer.cc deleted file mode 100644 index 45fc148bf..000000000 --- a/runtime/onert/backend/cpu/ops/QuantizeLayer.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "QuantizeLayer.h" - -#include <cker/operation/Quantize.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -QuantizeLayer::QuantizeLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -template <typename InputT, typename OutputT> void QuantizeLayer::affineQuantize() -{ - nnfw::cker::Quantize(getTensorShape(_input), reinterpret_cast<const InputT *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<OutputT *>(_output->buffer()), - _output->data_scale(), _output->data_offset()); -} - -void QuantizeLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void QuantizeLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - affineQuantize<float, uint8_t>(); - } - else - { - throw std::runtime_error{"Quantize: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/QuantizeLayer.h b/runtime/onert/backend/cpu/ops/QuantizeLayer.h deleted file mode 100644 index b4e7aca40..000000000 --- a/runtime/onert/backend/cpu/ops/QuantizeLayer.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class QuantizeLayer : public ::onert::exec::IFunction -{ -public: - QuantizeLayer(); - -public: - template <typename InputT, typename OutputT> void affineQuantize(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_QUANTIZELAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.cc b/runtime/onert/backend/cpu/ops/RankLayer.cc index 185d7554e..4690bdf72 100644 --- a/runtime/onert/backend/cpu/ops/RoundLayer.cc +++ b/runtime/onert/backend/cpu/ops/RankLayer.cc @@ -14,12 +14,10 @@ * limitations under the License. */ -#include "RoundLayer.h" +#include "RankLayer.h" #include "OperationUtils.h" -#include <cker/operation/Round.h> - namespace onert { namespace backend @@ -28,32 +26,28 @@ namespace cpu { namespace ops { -RoundLayer::RoundLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} -void RoundLayer::roundFloat32() +RankLayer::RankLayer() : _input(nullptr), _output(nullptr) { - nnfw::cker::Round(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); + // DO NOTHING } -void RoundLayer::configure(const IPortableTensor *input, IPortableTensor *output) +void RankLayer::configure(const IPortableTensor *input, IPortableTensor *output) { _input = input; _output = output; } -void RoundLayer::run() +void RankLayer::run() { - if (_input->data_type() == OperandType::FLOAT32) + if (_input->data_type() == OperandType::FLOAT32 || _input->data_type() == OperandType::INT32) { - roundFloat32(); + int32_t *output_data = reinterpret_cast<int32_t *>(_output->buffer()); + output_data[0] = _input->num_dimensions(); } else { - throw std::runtime_error{"Round: unsupported data type"}; + throw std::runtime_error{"Rank : unsupported data type"}; } } diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h b/runtime/onert/backend/cpu/ops/RankLayer.h index 054894203..6282ceb07 100644 --- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.h +++ b/runtime/onert/backend/cpu/ops/RankLayer.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ +#ifndef __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ +#define __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ #include <backend/IPortableTensor.h> @@ -29,11 +29,13 @@ namespace cpu { namespace ops { -class ZerosLikeLayer : public ::onert::exec::IFunction + +class RankLayer : public ::onert::exec::IFunction { public: - ZerosLikeLayer(); + RankLayer(); +public: void configure(const IPortableTensor *input, IPortableTensor *output); void run() override; @@ -48,4 +50,4 @@ private: } // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_ZEROS_LIKE_LAYER_H__ +#endif // __ONERT_BACKEND_CPU_OPS_RANKLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc b/runtime/onert/backend/cpu/ops/ReLU6Layer.cc deleted file mode 100644 index 26eb35e0d..000000000 --- a/runtime/onert/backend/cpu/ops/ReLU6Layer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ReLU6Layer.h" - -#include "OperationUtils.h" - -#include <cker/operation/ReLU6.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ReLU6Layer::ReLU6Layer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ReLU6Layer::relu6Float32() -{ - nnfw::cker::ReLU6(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - reinterpret_cast<float *>(_output->buffer())); -} - -void ReLU6Layer::relu6Quant8() -{ - // cker quant8 relu is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ReLU6Layer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ReLU6Layer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - relu6Float32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - relu6Quant8(); - } - else - { - throw std::runtime_error{"ReLU6: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.cc b/runtime/onert/backend/cpu/ops/ReLULayer.cc deleted file mode 100644 index cb4529feb..000000000 --- a/runtime/onert/backend/cpu/ops/ReLULayer.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ReLULayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/ReLU.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -ReLULayer::ReLULayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ReLULayer::reluFloat32() -{ - nnfw::cker::ReLU(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void ReLULayer::reluQuant8() -{ - // cker quant8 relu is not implemented yet - throw std::runtime_error{"NYI"}; -} - -void ReLULayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ReLULayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - reluFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - reluQuant8(); - } - else - { - throw std::runtime_error{"ReLU: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ReLULayer.h b/runtime/onert/backend/cpu/ops/ReLULayer.h deleted file mode 100644 index 4ba2be772..000000000 --- a/runtime/onert/backend/cpu/ops/ReLULayer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class ReLULayer : public ::onert::exec::IFunction -{ -public: - ReLULayer(); - -public: - void reluFloat32(); - - void reluQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_RELULAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.cc b/runtime/onert/backend/cpu/ops/ReduceLayer.cc index fe22dbed7..bb5f85d60 100644 --- a/runtime/onert/backend/cpu/ops/ReduceLayer.cc +++ b/runtime/onert/backend/cpu/ops/ReduceLayer.cc @@ -49,27 +49,31 @@ void evalLogic(const IPortableTensor *input, IPortableTensor *output, const std: } template <typename T> -void evalType(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes, - bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +evalType(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (reduce_type) { case ReduceType::kSum: - return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(0), reduce_kernel, - [](const T current, const T in) -> T { return in + current; }); + return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, static_cast<T>(0), reduce_kernel, + [](const T current, const T in) -> T { return in + current; }); break; case ReduceType::kProd: - return evalLogic<T>(input, output, axes, keep_dims, static_cast<T>(1), reduce_kernel, - [](const T current, const T in) -> T { return in * current; }); + return std::bind(&evalLogic<T>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, static_cast<T>(1), reduce_kernel, + [](const T current, const T in) -> T { return in * current; }); break; case ReduceType::kMax: - return evalLogic<T>( - input, output, axes, keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel, + return std::bind( + &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + keep_dims, std::numeric_limits<T>::lowest(), reduce_kernel, [](const T current, const T in) -> T { return (in > current) ? in : current; }); break; case ReduceType::kMin: - return evalLogic<T>( - input, output, axes, keep_dims, std::numeric_limits<T>::max(), reduce_kernel, + return std::bind( + &evalLogic<T>, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, + keep_dims, std::numeric_limits<T>::max(), reduce_kernel, [](const T current, const T in) -> T { return (in < current) ? in : current; }); break; default: @@ -79,44 +83,44 @@ void evalType(const IPortableTensor *input, IPortableTensor *output, const std:: // Template specialization for bool type template <> -void evalType<bool>(const IPortableTensor *input, IPortableTensor *output, - const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel, - ReduceType reduce_type) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +evalType<bool>(bool keep_dims, nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (reduce_type) { case ReduceType::kAny: - return evalLogic<bool>( - input, output, axes, keep_dims, false, reduce_kernel, - [](const bool current, const bool in) -> bool { return in || current; }); + return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, false, reduce_kernel, + [](const bool current, const bool in) -> bool { return in || current; }); break; case ReduceType::kAll: - return evalLogic<bool>( - input, output, axes, keep_dims, true, reduce_kernel, - [](const bool current, const bool in) -> bool { return in && current; }); + return std::bind(&evalLogic<bool>, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, true, reduce_kernel, + [](const bool current, const bool in) -> bool { return in && current; }); break; default: throw std::runtime_error{"Reduce: Unsupported reduce type"}; } } -template <ReduceType reduce_type> -void evalGeneric(const IPortableTensor *input, IPortableTensor *output, - const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel) +std::function<void(const IPortableTensor *, IPortableTensor *, const std::vector<int> &)> +generateKernelGeneric(const IPortableTensor *input, bool keep_dims, + nnfw::cker::Reduce &reduce_kernel, ReduceType reduce_type) { switch (input->data_type()) { case OperandType::FLOAT32: - return evalType<float>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<float>(keep_dims, reduce_kernel, reduce_type); case OperandType::INT32: - return evalType<int32_t>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<int32_t>(keep_dims, reduce_kernel, reduce_type); case OperandType::BOOL8: - return evalType<bool>(input, output, axes, keep_dims, reduce_kernel, reduce_type); + return evalType<bool>(keep_dims, reduce_kernel, reduce_type); default: throw std::runtime_error{"Reduce(generic): unsupported data type"}; } } +// TODO Refine this function void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output, const std::vector<int> &axes, bool keep_dims, nnfw::cker::Reduce &reduce_kernel) @@ -146,14 +150,15 @@ void evalSumQuantized(const IPortableTensor *input, IPortableTensor *output, return; } - evalGeneric<ReduceType::kSum>(input, output, axes, keep_dims, reduce_kernel); + const auto kernel = generateKernelGeneric(input, keep_dims, reduce_kernel, ReduceType::kSum); + kernel(input, output, axes); } } // namespace ReduceLayer::ReduceLayer() - : _input(nullptr), _axes(nullptr), _output(nullptr), _reduceType(ReduceType::kAny), - _keep_dims(false), _reduce_kernel(new nnfw::cker::Reduce()) + : _input(nullptr), _axes(nullptr), _output(nullptr), _reduce_kernel(new nnfw::cker::Reduce()), + _kernel() { // DO NOTHING } @@ -166,43 +171,44 @@ void ReduceLayer::configure(const IPortableTensor *input, const IPortableTensor _input = input; _axes = axes; _output = output; - _reduceType = reduceType; - _keep_dims = keep_dims; -} -void ReduceLayer::run() -{ - const auto axes = getReducerAxes(_axes); - switch (_reduceType) + switch (reduceType) { case ReduceType::kSum: if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) { - evalSumQuantized(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = std::bind(&evalSumQuantized, std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, keep_dims, *_reduce_kernel); return; } - evalGeneric<ReduceType::kSum>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kSum); break; case ReduceType::kProd: - evalGeneric<ReduceType::kProd>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kProd); break; case ReduceType::kMax: - evalGeneric<ReduceType::kMax>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMax); break; case ReduceType::kMin: - evalGeneric<ReduceType::kMin>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kMin); break; case ReduceType::kAny: - evalGeneric<ReduceType::kAny>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAny); break; case ReduceType::kAll: - evalGeneric<ReduceType::kAll>(_input, _output, axes, _keep_dims, *_reduce_kernel); + _kernel = generateKernelGeneric(_input, keep_dims, *_reduce_kernel, ReduceType::kAll); break; default: throw std::runtime_error{"ReduceSum: Unsupported reduce type"}; } } +void ReduceLayer::run() +{ + const auto axes = getReducerAxes(_axes); + _kernel(_input, _output, axes); +} + } // namespace ops } // namespace cpu } // namespace backend diff --git a/runtime/onert/backend/cpu/ops/ReduceLayer.h b/runtime/onert/backend/cpu/ops/ReduceLayer.h index 8e7bcdb07..332d399bd 100644 --- a/runtime/onert/backend/cpu/ops/ReduceLayer.h +++ b/runtime/onert/backend/cpu/ops/ReduceLayer.h @@ -65,10 +65,11 @@ private: const IPortableTensor *_input; const IPortableTensor *_axes; IPortableTensor *_output; - ReduceType _reduceType; - bool _keep_dims; std::unique_ptr<nnfw::cker::Reduce> _reduce_kernel; + std::function<void(const IPortableTensor *input, IPortableTensor *output, + const std::vector<int> &axes)> + _kernel; }; } // namespace ops diff --git a/runtime/onert/backend/cpu/ops/RoundLayer.h b/runtime/onert/backend/cpu/ops/RoundLayer.h deleted file mode 100644 index fc6a46c0d..000000000 --- a/runtime/onert/backend/cpu/ops/RoundLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class RoundLayer : public ::onert::exec::IFunction -{ -public: - RoundLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void roundFloat32(); - -private: - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_ROUNDLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc b/runtime/onert/backend/cpu/ops/RsqrtLayer.cc deleted file mode 100644 index 0bd468f96..000000000 --- a/runtime/onert/backend/cpu/ops/RsqrtLayer.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "RsqrtLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -RsqrtLayer::RsqrtLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void RsqrtLayer::rsqrtFloat32() -{ - nnfw::cker::Rsqrt(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void RsqrtLayer::rsqrtQuant8() { throw std::runtime_error{"NYI : QASYMM8 not supported"}; } - -void RsqrtLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void RsqrtLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - rsqrtFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - rsqrtQuant8(); - } - else - { - throw std::runtime_error{"Rsqrt: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/RsqrtLayer.h b/runtime/onert/backend/cpu/ops/RsqrtLayer.h deleted file mode 100644 index 49abbb08d..000000000 --- a/runtime/onert/backend/cpu/ops/RsqrtLayer.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class RsqrtLayer : public ::onert::exec::IFunction -{ -public: - RsqrtLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void rsqrtFloat32(); - void rsqrtQuant8(); - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_RSQRTLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/SinLayer.cc b/runtime/onert/backend/cpu/ops/SinLayer.cc deleted file mode 100644 index 2a6b11753..000000000 --- a/runtime/onert/backend/cpu/ops/SinLayer.cc +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "SinLayer.h" -#include "OperationUtils.h" - -#include <cker/operation/Elementwise.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -SinLayer::SinLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void SinLayer::sinFloat32() -{ - nnfw::cker::Sin(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void SinLayer::sinQuant8() { throw std::runtime_error{"NYI"}; } - -void SinLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void SinLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - sinFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - sinQuant8(); - } - else - { - throw std::runtime_error{"Sin: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/SinLayer.h b/runtime/onert/backend/cpu/ops/SinLayer.h deleted file mode 100644 index 348350f41..000000000 --- a/runtime/onert/backend/cpu/ops/SinLayer.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in riting, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ - -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class SinLayer : public ::onert::exec::IFunction -{ -public: - SinLayer(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; - -private: - void sinFloat32(); - void sinQuant8(); - - const IPortableTensor *_input; - IPortableTensor *_output; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_SINLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc index 6e2bb584a..095e67abc 100644 --- a/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc +++ b/runtime/onert/backend/cpu/ops/SoftMaxLayer.cc @@ -34,55 +34,23 @@ SoftMaxLayer::SoftMaxLayer() : _input(nullptr), _output(nullptr), _beta(0.0) // DO NOTHING } -// Performs softmax along the input of size (input_size * batch_size). -void Softmax(const float *in, const int input_size, const int batch_size, const float beta, - float *out) +void SoftMaxLayer::softmaxFloat32() { - assert(input_size > 0); - - // For each batch - for (int b = 0; b < batch_size; b++) + if (getNumberOfDimensions(_input) == 1) { - // Find the max coeff. - float max_coeff = in[0]; - for (int i = 1; i < input_size; i++) - { - if (in[i] > max_coeff) - max_coeff = in[i]; - } - - // Compute the normalized sum of exps. - float exp_sum = 0.0; - for (int i = 0; i < input_size; i++) - { - out[i] = std::exp((in[i] - max_coeff) * beta); - exp_sum += out[i]; - } - - // Divide by the sum of exps. - float reciprocal_sum_exp = 1.f / exp_sum; - for (int i = 0; i < input_size; i++) - { - out[i] *= reciprocal_sum_exp; - } - - // Advance in and out pointers for the next batch. - in += input_size; - out += input_size; + uint32_t input_size = getNumberOfElements(_input); + nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, 1, _beta, + reinterpret_cast<float *>(_output->buffer())); } -} - -void SoftMaxLayer::softmaxFloat32() -{ - if (getNumberOfDimensions(_input) == 2) + else if (getNumberOfDimensions(_input) == 2) { uint32_t batch_size = getSizeOfDimension(_input, 0); if (batch_size == 0) throw std::runtime_error("batch_size should not be 0"); uint32_t input_size = getNumberOfElements(_input) / batch_size; - Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, _beta, - reinterpret_cast<float *>(_output->buffer())); + nnfw::cker::Softmax(reinterpret_cast<const float *>(_input->buffer()), input_size, batch_size, + _beta, reinterpret_cast<float *>(_output->buffer())); } else if (getNumberOfDimensions(_input) == 4) { @@ -94,7 +62,7 @@ void SoftMaxLayer::softmaxFloat32() } else { - throw std::runtime_error{"only 2D and 4D tensors supported"}; + throw std::runtime_error{"only 1D, 2D and 4D tensors supported"}; } } diff --git a/runtime/onert/backend/cpu/ops/SubLayer.cc b/runtime/onert/backend/cpu/ops/SubLayer.cc deleted file mode 100644 index 597d52952..000000000 --- a/runtime/onert/backend/cpu/ops/SubLayer.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "SubLayer.h" - -#include <cker/operation/BinaryArithmeticOps.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -void SubLayer::subFloat32() -{ - float output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.float_activation_max = output_activation_max; - op_params.float_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const float *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const float *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void SubLayer::subInt32() -{ - int32_t output_activation_min = 0, output_activation_max = 0; - CalculateActivationRange(_activation, &output_activation_min, &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const int32_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const int32_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<int32_t *>(_output->buffer())); -} - -void SubLayer::subQuant8() -{ - int32_t output_activation_min, output_activation_max; - CalculateActivationRangeUint8(_activation, _output, &output_activation_min, - &output_activation_max); - nnfw::cker::BinaryArithmeticOpParam op_params; - op_params.quantized_activation_max = output_activation_max; - op_params.quantized_activation_min = output_activation_min; - // Parameters for scaled quantized computation - op_params.left_shift = 20; - // Zero-points of input and output tensors - op_params.input1_offset = -_lhs->data_offset(); - op_params.input2_offset = -_rhs->data_offset(); - op_params.output_offset = _output->data_offset(); - assert((op_params.input1_offset >= 0) && (op_params.input1_offset <= 255)); - assert((op_params.input2_offset >= 0) && (op_params.input2_offset <= 255)); - assert((op_params.output_offset >= 0) && (op_params.output_offset <= 255)); - - // Compute normalized scale for _lhs and _rhs values, - // and represent in 32-bit fixed point - const double norm_max_scale = 2 * std::max(_lhs->data_scale(), _rhs->data_scale()); - const double real_lhs_scale = _lhs->data_scale() / norm_max_scale; - const double real_rhs_scale = _rhs->data_scale() / norm_max_scale; - // output scale is used to normalize final result, so we invert the scale here - const double real_output_scale = - norm_max_scale / (_output->data_scale() * (1 << op_params.left_shift)); - - // Represent the scales as fixed int32_t multipliers, and int32_t shifts - QuantizeMultiplier(real_lhs_scale, &op_params.input1_multiplier, &op_params.input1_shift); - QuantizeMultiplier(real_rhs_scale, &op_params.input2_multiplier, &op_params.input2_shift); - op_params.input2_multiplier *= -1; - QuantizeMultiplier(real_output_scale, &op_params.output_multiplier, &op_params.output_shift); - - const bool need_broadcast = - nnfw::cker::ProcessBroadcastShapes(getTensorShape(_lhs), getTensorShape(_rhs), &op_params); - if (need_broadcast) - { - nnfw::cker::BroadcastBinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); - return; - } - - nnfw::cker::BinaryArithmeticOp<nnfw::cker::BinaryArithmeticOpType::SUB>( - op_params, getTensorShape(_lhs), reinterpret_cast<const uint8_t *>(_lhs->buffer()), - getTensorShape(_rhs), reinterpret_cast<const uint8_t *>(_rhs->buffer()), - getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer())); -} - -void SubLayer::configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output) -{ - _lhs = lhs; - _rhs = rhs; - _activation = activation; - _output = output; -} - -void SubLayer::run() -{ - if (_output->data_type() == OperandType::FLOAT32) - { - subFloat32(); - } - else if (_output->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - subQuant8(); - } - else if (_output->data_type() == OperandType::INT32) - { - subInt32(); - } - else - { - throw std::runtime_error{"Sub: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/SubLayer.h b/runtime/onert/backend/cpu/ops/SubLayer.h deleted file mode 100644 index 86f32ca6d..000000000 --- a/runtime/onert/backend/cpu/ops/SubLayer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ - -#include <backend/IPortableTensor.h> -#include "OperationUtils.h" - -#include <exec/IFunction.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -class SubLayer : public ::onert::exec::IFunction -{ -public: - SubLayer() : _lhs(nullptr), _rhs(nullptr), _output(nullptr) - { - // DO NOTHING - } - -public: - void subFloat32(); - - void subQuant8(); - - void subInt32(); - - void configure(const IPortableTensor *lhs, const IPortableTensor *rhs, - const ir::Activation activation, IPortableTensor *output); - - void run() override; - -private: - const IPortableTensor *_lhs; - const IPortableTensor *_rhs; - IPortableTensor *_output; - - ir::Activation _activation{ir::Activation::NONE}; -}; - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert - -#endif // __ONERT_BACKEND_CPU_OPS_SUBLAYER_H__ diff --git a/runtime/onert/backend/cpu/ops/TanhLayer.cc b/runtime/onert/backend/cpu/ops/TanhLayer.cc deleted file mode 100644 index 910ac1f41..000000000 --- a/runtime/onert/backend/cpu/ops/TanhLayer.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "TanhLayer.h" - -#include "OperationUtils.h" - -#include <cker/operation/Tanh.h> - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ - -TanhLayer::TanhLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void TanhLayer::PopulateLookupTable() -{ - const auto input_scale = static_cast<double>(_input->data_scale()); - const auto input_zero_point = static_cast<int32_t>(_input->data_offset()); - const auto output_scale = static_cast<double>(_output->data_scale()); - const auto output_zero_point = static_cast<int32_t>(_output->data_offset()); - const float inverse_scale = 1 / output_scale; - int32_t maxval = std::numeric_limits<uint8_t>::max(); - int32_t minval = std::numeric_limits<uint8_t>::min(); - for (int32_t val = minval; val <= maxval; ++val) - { - const float dequantized = input_scale * (val - input_zero_point); - const float transformed = std::tanh(dequantized); - const float rescaled = std::round(transformed * inverse_scale); - const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point); - _table[val] = static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)); - } -} - -void TanhLayer::tanhFloat32() -{ - nnfw::cker::Tanh(getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()), - getTensorShape(_output), reinterpret_cast<float *>(_output->buffer())); -} - -void TanhLayer::tanhQuant8() -{ - const int size = MatchingFlatSize(getTensorShape(_input), getTensorShape(_output)); - const uint8_t *input_data = reinterpret_cast<const uint8_t *>(_input->buffer()); - uint8_t *output_data = reinterpret_cast<uint8_t *>(_output->buffer()); - - for (int i = 0; i < size; ++i) - { - output_data[i] = _table[input_data[i]]; - } -} - -void TanhLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; - if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - PopulateLookupTable(); - } -} - -void TanhLayer::run() -{ - if (_input->data_type() == OperandType::FLOAT32) - { - tanhFloat32(); - } - else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM) - { - tanhQuant8(); - } - else - { - throw std::runtime_error{"Tanh: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc b/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc deleted file mode 100644 index ae8084518..000000000 --- a/runtime/onert/backend/cpu/ops/ZerosLikeLayer.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ZerosLikeLayer.h" - -#include "OperationUtils.h" - -namespace onert -{ -namespace backend -{ -namespace cpu -{ -namespace ops -{ -ZerosLikeLayer::ZerosLikeLayer() : _input(nullptr), _output(nullptr) -{ - // DO NOTHING -} - -void ZerosLikeLayer::configure(const IPortableTensor *input, IPortableTensor *output) -{ - _input = input; - _output = output; -} - -void ZerosLikeLayer::run() -{ - if (!HaveSameShapes(_input, _output)) - throw std::runtime_error{"ZerosLike: input and output shape don't match."}; - - auto element_size = getTensorShape(_input).FlatSize(); - - switch (_input->data_type()) - { - case OperandType::FLOAT32: - memset(reinterpret_cast<float *>(_output->buffer()), 0, element_size * sizeof(float)); - break; - case OperandType::INT32: - memset(reinterpret_cast<int32_t *>(_output->buffer()), 0, element_size * sizeof(int32_t)); - break; - default: - throw std::runtime_error{"ZerosLike: unsupported data type"}; - } -} - -} // namespace ops -} // namespace cpu -} // namespace backend -} // namespace onert diff --git a/runtime/onert/core/include/backend/BackendContext.h b/runtime/onert/core/include/backend/BackendContext.h index c263aef2b..1eba29550 100644 --- a/runtime/onert/core/include/backend/BackendContext.h +++ b/runtime/onert/core/include/backend/BackendContext.h @@ -29,6 +29,7 @@ class Backend; class IConstantInitializer; class IKernelGenerator; class ITensorRegister; +struct ITensorRegistry; struct ITensorBuilder; struct IOptimizer; @@ -45,14 +46,15 @@ public: public: BackendContext(const Backend *backend, const ir::Graph *graph, + std::shared_ptr<ITensorRegistry> tensor_registry = nullptr, std::shared_ptr<ITensorBuilder> tensor_builder = nullptr, std::shared_ptr<IConstantInitializer> constant_initializer = nullptr, std::shared_ptr<IKernelGenerator> kernel_gen = nullptr, std::shared_ptr<ITensorRegister> tensor_register = nullptr, std::shared_ptr<IOptimizer> optimizer = nullptr) - : _backend{backend}, _graph{graph}, tensor_builder{tensor_builder}, - constant_initializer{constant_initializer}, kernel_gen{kernel_gen}, - tensor_register{tensor_register}, optimizer{optimizer} + : _backend{backend}, _graph{graph}, tensor_registry{tensor_registry}, + tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, + kernel_gen{kernel_gen}, tensor_register{tensor_register}, optimizer{optimizer} { } @@ -74,6 +76,7 @@ private: std::vector<ir::OperandIndex> _operand_list; public: + std::shared_ptr<ITensorRegistry> tensor_registry; std::shared_ptr<ITensorBuilder> tensor_builder; std::shared_ptr<IConstantInitializer> constant_initializer; std::shared_ptr<IKernelGenerator> kernel_gen; diff --git a/runtime/onert/core/include/backend/IConstantInitializer.h b/runtime/onert/core/include/backend/IConstantInitializer.h index f322015ba..149acecb4 100644 --- a/runtime/onert/core/include/backend/IConstantInitializer.h +++ b/runtime/onert/core/include/backend/IConstantInitializer.h @@ -162,14 +162,14 @@ public: public: void run() { - assert(tensor_builder().get()); + assert(tensor_registry()); for (const auto &it : _init_map) { const auto &ind = it.first; const auto &fn = it.second; const auto &model_obj = _operands.at(ind); - auto tensor_obj = tensor_builder()->tensorAt(ind); + auto tensor_obj = tensor_registry()->getNativeITensor(ind); assert(tensor_obj != nullptr); fn(model_obj, *tensor_obj); VERBOSE(FillOperandData) << "Fill data for operand " << ind.value() << std::endl; @@ -189,10 +189,7 @@ public: void setLayout(ir::Layout layout) { _current_op_seq_layout = layout; } protected: - using OperationVisitor::visit; - -protected: - virtual std::shared_ptr<ITensorBuilder> tensor_builder() const = 0; + virtual std::shared_ptr<ITensorRegistry> tensor_registry() const = 0; public: virtual void registerDefaultInitializer(const ir::OperandIndex &index, const ir::Operand &obj) diff --git a/runtime/onert/core/include/backend/ITensorBuilder.h b/runtime/onert/core/include/backend/ITensorBuilder.h index b760cda0e..f93ab81ae 100644 --- a/runtime/onert/core/include/backend/ITensorBuilder.h +++ b/runtime/onert/core/include/backend/ITensorBuilder.h @@ -40,11 +40,6 @@ struct ITensorBuilder virtual ~ITensorBuilder(void) = default; /** - * @brief Returns true if this TensorBuilder support dynamic tensor - */ - virtual bool supportDynamicTensor() = 0; - - /** * @brief Register tensor information to allocate on backend * * @param ind Index @@ -63,15 +58,6 @@ struct ITensorBuilder */ virtual bool isRegistered(const ir::OperandIndex &) const = 0; - /** - * @brief Get tensor registry - * - * @return std::shared_ptr<backend::ITensorRegistry> tensor registry object - * - * @note Backend should implement this when it has StaticTensorManager and DynamicTensorManager - */ - virtual std::shared_ptr<backend::ITensorRegistry> tensorRegistry() = 0; - public: // methods for static tensor allocation /** * @brief Let the tensor builder know first use(start of lifetime) of a tensor @@ -104,32 +90,6 @@ public: // methods for static tensor allocation virtual void postFunctionPrepare() = 0; /** - * @brief Get the tensor object - * - * @param ind Index of the tensor - * @return std::shared_ptr<ITensor> The tensor object - */ - virtual std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) = 0; - - /** - * @brief Set the migrant tensor object - * - * @return true if succeeded - * @return false if failed or unsupported - */ - virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &) - { - return false; - } - - /** - * @brief Iterate over tensors - * - * @param fn The function to be run - */ - virtual void iterate(const IterateFunction &fn) = 0; - - /** * @brief Release static @c ITensorManger object which was built * Before calling this, @c allocate must have been called * @@ -147,10 +107,7 @@ public: // methods for dynamic tensor allocation * @note Since it is a pointer, its life time is from the cration of TensorBuilder * to the end of execution */ - virtual IDynamicTensorManager *dynamicTensorManager(void) - { - throw std::runtime_error("dynamicTensorManager(): NYI"); - } + virtual IDynamicTensorManager *dynamicTensorManager(void) { return nullptr; } /** * @brief Release dynamic @c ITensorManger object which was built @@ -158,10 +115,7 @@ public: // methods for dynamic tensor allocation * * @return std::unique_ptr<ITensorManager> Tensor Manager object */ - virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) - { - throw std::runtime_error("releaseDynamicTensorManager() for this backend is not supported"); - } + virtual std::unique_ptr<ITensorManager> releaseDynamicTensorManager(void) { return nullptr; } }; } // namespace backend diff --git a/runtime/onert/core/include/backend/ITensorRegistry.h b/runtime/onert/core/include/backend/ITensorRegistry.h index 855513124..88fcb0fcd 100644 --- a/runtime/onert/core/include/backend/ITensorRegistry.h +++ b/runtime/onert/core/include/backend/ITensorRegistry.h @@ -21,6 +21,7 @@ #include "ir/Index.h" #include "backend/ITensor.h" +#include "backend/IPortableTensor.h" namespace onert { @@ -51,13 +52,22 @@ struct ITensorRegistry * @note Returned tensor cannot be used longer than dynamic tensor manager */ virtual std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &) = 0; + /** + * @brief Set the Migrant Tensor which are from other backends + * + * @return true if supported + * @return false if not supported + */ + virtual bool setMigrantTensor(const ir::OperandIndex &, const std::shared_ptr<IPortableTensor> &) + { + return false; + } }; } // namespace backend } // namespace onert #include "ir/OperandIndexMap.h" -#include "backend/IPortableTensor.h" namespace onert { @@ -108,24 +118,23 @@ public: return nullptr; } - bool setMigrantTensor(const ir::OperandIndex &ind, const std::shared_ptr<IPortableTensor> &tensor) + bool setMigrantTensor(const ir::OperandIndex &ind, + const std::shared_ptr<IPortableTensor> &tensor) override { - // TODO Uncomment this as two tensors for an index is not allowed. - // But now it is temporarily allowed as a workaround. External one hides Managed one. - // auto itr = _native.find(ind); - // if (itr != _native.end() && itr->second != nullptr && tensor != nullptr) - // throw std::runtime_error{ - // "Tried to set an migrant tensor but an native tensor already exists."}; + assert(tensor != nullptr); + auto itr = _native.find(ind); + if (itr != _native.end()) + throw std::runtime_error{"Tried to set a migrant tensor but a native tensor already exists."}; _migrant[ind] = tensor; return true; } void setNativeTensor(const ir::OperandIndex &ind, const std::shared_ptr<T_Tensor> &tensor) { + assert(tensor != nullptr); auto itr = _migrant.find(ind); - if (itr != _migrant.end() && itr->second != nullptr && tensor != nullptr) - throw std::runtime_error{ - "Tried to set a native tensor but an migrant tensor already exists."}; + if (itr != _migrant.end()) + throw std::runtime_error{"Tried to set a native tensor but a migrant tensor already exists."}; _native[ind] = tensor; } diff --git a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h index a7e034a91..3f09b7a4a 100644 --- a/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h +++ b/runtime/onert/core/include/backend/cpu_common/StaticTensorManager.h @@ -20,6 +20,7 @@ #include "MemoryManager.h" #include "backend/IStaticTensorManager.h" +#include "backend/IDynamicTensorManager.h" #include "ir/OperandIndexMap.h" #include "ir/OperandInfo.h" #include "TensorRegistry.h" @@ -34,7 +35,8 @@ namespace cpu_common class StaticTensorManager : public backend::IStaticTensorManager { public: - StaticTensorManager(const std::shared_ptr<TensorRegistry> ®); + StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, + IDynamicTensorManager *dynamic_tensor_manager); virtual ~StaticTensorManager() = default; void allocateConsts(void); @@ -55,6 +57,7 @@ private: std::unique_ptr<MemoryManager> _nonconst_mgr; const std::shared_ptr<TensorRegistry> _tensors; ir::OperandIndexMap<bool> _as_constants; + IDynamicTensorManager *_dynamic_tensor_manager; }; } // namespace cpu_common diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h new file mode 100644 index 000000000..aadba6857 --- /dev/null +++ b/runtime/onert/core/include/compiler/LoweredGraph.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_IR_LOWERED_GRAPH_H__ +#define __ONERT_IR_LOWERED_GRAPH_H__ + +#include "ir/Graph.h" +#include "ir/LowerInfoMap.h" +#include "ir/OpSequences.h" +#include "compiler/BackendResolver.h" +#include "compiler/Compiler.h" + +namespace onert +{ +namespace compiler +{ + +/** + * @brief Class that contains lowering information on graph. + * In addition, after lowering, operands in graph will be set to "dynamic" + * if the shape of output of an operation cannot be decided at compilation time. + */ +class LoweredGraph +{ +public: + LoweredGraph(const ir::Graph &graph, const compiler::CompilerOptions &options); + + ir::Graph &graph() { return _graph; } + const ir::Graph &graph() const { return _graph; } + const ir::LowerInfoMap *getLowerInfo() const { return &_lower_info_map; } + const ir::operation::LowerInfo *getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const; + void setLowerInfo(const ir::OpSequenceIndex &op_seq_index, + std::unique_ptr<ir::operation::LowerInfo> &&lower_info); + void removeLowerInfo(const ir::OpSequenceIndex &op_seq_index); + const ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index) const; + ir::operand::LowerInfo *getLowerInfo(const ir::OperandIndex &index); + void setLowerInfo(const ir::OperandIndex &index, + std::unique_ptr<ir::operand::LowerInfo> &&lower_info); + void removeLowerInfo(const ir::OperandIndex &index); + ir::OpSequences &op_seqs() { return _op_seqs; } + const ir::OpSequences &op_seqs() const { return _op_seqs; } + void iterateTopolOpSeqs( + const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const; + void + iterateTopolOpSeqs(const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn); + const backend::BackendContexts &backend_contexts() { return _backend_contexts; } + const backend::BackendContexts &backend_contexts() const { return _backend_contexts; } + std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; } + +private: + void + makeOpSequences(ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, + const compiler::CompilerOptions &options, + const compiler::BackendResolver &backend_resolver); + + void manipulateLowerInfo( + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, + bool is_primary); + void dumpLowerInfo(); + bool mergeable(const ir::OpSequenceIndex &op_seq_index, const ir::OperationIndex &node_index, + ir::Layout layout, const compiler::BackendResolver &backend_resolver); + ir::OpSequenceIndex appendFreshSingleOpSequence(const ir::OperationIndex &node_index, + const ir::Operation &node); + +private: + ir::Graph _graph; + backend::BackendContexts _backend_contexts; + std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; + ir::LowerInfoMap _lower_info_map; + // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member + ir::OpSequences _op_seqs; +}; + +} // namespace compiler +} // namespace onert + +#endif // __ONERT_IR_LOWERED_GRAPH_H__ diff --git a/runtime/onert/core/include/compiler/StaticShapeInference.h b/runtime/onert/core/include/compiler/StaticShapeInference.h index bff68c9fa..b97cb5b7b 100644 --- a/runtime/onert/core/include/compiler/StaticShapeInference.h +++ b/runtime/onert/core/include/compiler/StaticShapeInference.h @@ -19,7 +19,7 @@ #include "ir/OperationVisitor.h" #include "ir/OpSequence.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" #include "ir/Index.h" #include <memory> @@ -41,7 +41,8 @@ class StaticShapeInferer : public ir::OperationVisitor public: StaticShapeInferer( const ir::SubgraphIndex &subg_idx, - const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &lowered_subgs) + const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> + &lowered_subgs) : _lowered_subgs(lowered_subgs), _operands(lowered_subgs.at(subg_idx)->graph().operands()), _operations(lowered_subgs.at(subg_idx)->graph().operations()), _return_has_dynamic_tensor(false) @@ -57,54 +58,34 @@ public: * @param op_seq sequence of operations * @return @c true if op_seq's input or output has any dynamic tensor; @c false otherwise. */ - bool infer(const ir::OpSequence &op_seq) - { - bool has_dynamic_tensor = false; - - _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit() - - for (const auto &operation_idx : op_seq.operations()) - { - _operations.at(operation_idx).accept(*this); - - has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor; - } - - return has_dynamic_tensor; - } + bool infer(const ir::OpSequence &op_seq); void dump(); private: + bool checkDynamicInput(const ir::Operation &op); + void setDynamicOutput(const ir::Operation &op); + +private: // TODO Define visitors for operations. List them in alphabetic order. - void visit(const ir::operation::Abs &op) override; - void visit(const ir::operation::Add &op) override; void visit(const ir::operation::ArgMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; + void visit(const ir::operation::BinaryArithmetic &op) override; void visit(const ir::operation::BroadcastTo &op) override; - void visit(const ir::operation::Cast &op) override; void visit(const ir::operation::Comparison &op) override; void visit(const ir::operation::Concat &op) override; void visit(const ir::operation::Conv2D &op) override; - void visit(const ir::operation::Cos &op) override; - void visit(const ir::operation::Div &op) override; - void visit(const ir::operation::Exp &op) override; + void visit(const ir::operation::ElementwiseActivation &op) override; + void visit(const ir::operation::ElementwiseBinary &op) override; + void visit(const ir::operation::ElementwiseUnary &op) override; void visit(const ir::operation::ExpandDims &op) override; void visit(const ir::operation::Fill &op) override; void visit(const ir::operation::FullyConnected &op) override; void visit(const ir::operation::FusedBatchNorm &op) override; void visit(const ir::operation::Gather &op) override; void visit(const ir::operation::If &op) override; - void visit(const ir::operation::Log &op) override; - void visit(const ir::operation::LogicalNot &op) override; - void visit(const ir::operation::LogicalOr &op) override; - void visit(const ir::operation::Logistic &op) override; void visit(const ir::operation::L2Normalization &op) override; void visit(const ir::operation::MatrixBandPart &op) override; - void visit(const ir::operation::Max &op) override; - void visit(const ir::operation::Min &op) override; - void visit(const ir::operation::Mul &op) override; - void visit(const ir::operation::Neg &op) override; void visit(const ir::operation::OneHot &op) override; void visit(const ir::operation::Pack &op) override; void visit(const ir::operation::Pad &op) override; @@ -113,27 +94,21 @@ private: void visit(const ir::operation::Range &op) override; void visit(const ir::operation::Reduce &op) override; void visit(const ir::operation::Reshape &op) override; - void visit(const ir::operation::Round &op) override; - void visit(const ir::operation::RSQRT &op) override; void visit(const ir::operation::ResizeBilinear &op) override; void visit(const ir::operation::Reverse &op) override; void visit(const ir::operation::Select &op) override; void visit(const ir::operation::Shape &op) override; - void visit(const ir::operation::Sin &op) override; void visit(const ir::operation::Slice &op) override; void visit(const ir::operation::Softmax &op) override; void visit(const ir::operation::SpaceToBatchND &op) override; void visit(const ir::operation::Split &op) override; void visit(const ir::operation::Squeeze &op) override; void visit(const ir::operation::StridedSlice &op) override; - void visit(const ir::operation::Sub &op) override; void visit(const ir::operation::SquaredDifference &op) override; - void visit(const ir::operation::Tanh &op) override; void visit(const ir::operation::Tile &op) override; void visit(const ir::operation::Transpose &op) override; void visit(const ir::operation::Unpack &op) override; void visit(const ir::operation::While &op) override; - void visit(const ir::operation::ZerosLike &op) override; private: /** @@ -149,7 +124,8 @@ private: void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx); private: - const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> &_lowered_subgs; + const std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> + &_lowered_subgs; // _operands and _operations can be changed by controlflow operation ir::Operands &_operands; // operands of current subgraph ir::Operations &_operations; // operations of current subgraph diff --git a/runtime/onert/core/include/exec/DynamicShapeInference.h b/runtime/onert/core/include/exec/DynamicShapeInference.h index bca80db09..6f6659659 100644 --- a/runtime/onert/core/include/exec/DynamicShapeInference.h +++ b/runtime/onert/core/include/exec/DynamicShapeInference.h @@ -38,46 +38,34 @@ namespace exec class DynamicShapeInferer : public ir::OperationVisitor { public: - DynamicShapeInferer(const ir::Operands &operands, backend::IDynamicTensorManager *tensor_manager, + DynamicShapeInferer(const ir::Operands &operands, const std::shared_ptr<backend::ITensorRegistry> &tensor_registry) - : _operands(operands), _dynamic_tensor_manager(tensor_manager), - _tensor_registry(tensor_registry) + : _operands(operands), _tensor_registry(tensor_registry) { UNUSED_RELEASE(_operands); - UNUSED_RELEASE(_dynamic_tensor_manager); UNUSED_RELEASE(_tensor_registry); } public: // TODO Define visitors for operations. List them in alphabetic order. // Remove TODO when any op starting from the alphabet is added - void visit(const ir::operation::Abs &op) override; - void visit(const ir::operation::Add &op) override; void visit(const ir::operation::ArgMax &op) override; void visit(const ir::operation::BatchMatMul &op) override; + void visit(const ir::operation::BinaryArithmetic &op) override; void visit(const ir::operation::BroadcastTo &op) override; - void visit(const ir::operation::Cast &op) override; void visit(const ir::operation::Comparison &op) override; void visit(const ir::operation::Concat &op) override; void visit(const ir::operation::Conv2D &op) override; - void visit(const ir::operation::Cos &op) override; - void visit(const ir::operation::Div &op) override; - void visit(const ir::operation::Exp &op) override; + void visit(const ir::operation::ElementwiseActivation &op) override; + void visit(const ir::operation::ElementwiseBinary &op) override; + void visit(const ir::operation::ElementwiseUnary &op) override; void visit(const ir::operation::ExpandDims &op) override; void visit(const ir::operation::Fill &op) override; void visit(const ir::operation::FullyConnected &op) override; void visit(const ir::operation::FusedBatchNorm &op) override; void visit(const ir::operation::Gather &op) override; - void visit(const ir::operation::Log &op) override; - void visit(const ir::operation::LogicalNot &op) override; - void visit(const ir::operation::LogicalOr &op) override; - void visit(const ir::operation::Logistic &op) override; void visit(const ir::operation::L2Normalization &op) override; void visit(const ir::operation::MatrixBandPart &op) override; - void visit(const ir::operation::Max &op) override; - void visit(const ir::operation::Min &op) override; - void visit(const ir::operation::Mul &op) override; - void visit(const ir::operation::Neg &op) override; void visit(const ir::operation::OneHot &op) override; void visit(const ir::operation::Pack &op) override; void visit(const ir::operation::Pad &op) override; @@ -87,27 +75,21 @@ public: void visit(const ir::operation::Range &op) override; void visit(const ir::operation::Reduce &op) override; void visit(const ir::operation::Reshape &op) override; - void visit(const ir::operation::Round &op) override; - void visit(const ir::operation::RSQRT &op) override; void visit(const ir::operation::ResizeBilinear &op) override; void visit(const ir::operation::Reverse &op) override; void visit(const ir::operation::Select &op) override; void visit(const ir::operation::Shape &op) override; - void visit(const ir::operation::Sin &op) override; void visit(const ir::operation::Slice &op) override; void visit(const ir::operation::Softmax &op) override; void visit(const ir::operation::SpaceToBatchND &op) override; void visit(const ir::operation::Split &op) override; void visit(const ir::operation::Squeeze &op) override; void visit(const ir::operation::StridedSlice &op) override; - void visit(const ir::operation::Sub &op) override; void visit(const ir::operation::SquaredDifference &op) override; - void visit(const ir::operation::Tanh &op) override; void visit(const ir::operation::Tile &op) override; void visit(const ir::operation::Transpose &op) override; void visit(const ir::operation::Unpack &op) override; // TODO write op starting from V - void visit(const ir::operation::ZerosLike &op) override; private: /** @@ -127,11 +109,6 @@ private: */ const ir::Operands &_operands; /** - * @brief To allocate memory for output tensor if needed - */ - // TODO Remove this, as it is no longer used - backend::IDynamicTensorManager *_dynamic_tensor_manager; - /** * @brief To get tensor object and access tensor-level info, e.g., ITensor::buffer() */ std::shared_ptr<backend::ITensorRegistry> _tensor_registry; diff --git a/runtime/onert/core/include/exec/IExecutor.h b/runtime/onert/core/include/exec/IExecutor.h index 46e05a289..6c8bab67c 100644 --- a/runtime/onert/core/include/exec/IExecutor.h +++ b/runtime/onert/core/include/exec/IExecutor.h @@ -80,8 +80,6 @@ struct DynAllocInfo { /// @brief index of input tensor whose memory needs to be allocated at execution time ir::OperandIndex ind; - /// @brief dynamic tensor manager that can allocate memory when input tensor is dynamic - backend::IDynamicTensorManager *dyn_tensor_manager; }; using DynAllocInfoMap = std::unordered_map<std::shared_ptr<backend::ITensor>, DynAllocInfo>; diff --git a/runtime/onert/core/include/exec/IODescription.h b/runtime/onert/core/include/exec/IODescription.h index c10c36756..d1810ec3b 100644 --- a/runtime/onert/core/include/exec/IODescription.h +++ b/runtime/onert/core/include/exec/IODescription.h @@ -62,8 +62,8 @@ struct IODescription { std::vector<std::unique_ptr<InputDesc>> inputs; std::vector<std::unique_ptr<OutputDesc>> outputs; - // Contains shape of input set by set_input_tensorinfo - std::unordered_map<ir::IOIndex, ir::Shape> input_shape_signature; + // Contains shape of input set by nnfw_set_input_tensorinfo(..) + std::unordered_map<ir::IOIndex, ir::Shape> dynamic_input_shapes; }; } // namespace exec diff --git a/runtime/onert/core/include/ir/Graph.h b/runtime/onert/core/include/ir/Graph.h index fb956fedf..2103e6e64 100644 --- a/runtime/onert/core/include/ir/Graph.h +++ b/runtime/onert/core/include/ir/Graph.h @@ -60,8 +60,8 @@ public: OperandIndex addOperand(const Shape &shape, const TypeInfo &type); OperationIndex addOperation(std::unique_ptr<Operation> &&node); void setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data); - void addInput(const OperandIndex &ind); - void addOutput(const OperandIndex &ind); + void addInput(const OperandIndex &ind, const std::string &name = ""); + void addOutput(const OperandIndex &ind, const std::string &name = ""); void finishBuilding(void); void removeOperand(const OperandIndex &ind) { _operands.remove(ind); } bool isBuildingPhase(void) const { return _phase == Phase::BUILDING; } @@ -94,6 +94,8 @@ public: OperandIndexSequence &getInputs() { return _inputs; } const OperandIndexSequence &getOutputs() const { return _outputs; } OperandIndexSequence &getOutputs() { return _outputs; } + IOIndex getInputIndex(const std::string &name) const; + IOIndex getOutputIndex(const std::string &name) const; const Operands &operands() const { return _operands; } Operands &operands() { return _operands; } // TODO Remove this non-const accessor const Operations &operations() const { return _operations; } @@ -108,6 +110,8 @@ private: Operands _operands; OperandIndexSequence _inputs; OperandIndexSequence _outputs; + std::unordered_map<std::string, IOIndex> _name_to_input; + std::unordered_map<std::string, IOIndex> _name_to_output; // Child subgraphs std::shared_ptr<Subgraphs> _subgraphs; // TFLite and circle's default layout is NHWC; diff --git a/runtime/onert/core/include/ir/InternalType.h b/runtime/onert/core/include/ir/InternalType.h index e42db72cf..1d962c185 100644 --- a/runtime/onert/core/include/ir/InternalType.h +++ b/runtime/onert/core/include/ir/InternalType.h @@ -40,6 +40,12 @@ struct Stride uint32_t horizontal; }; +struct Dilation +{ + uint32_t width_factor; + uint32_t height_factor; +}; + } // namespace ir } // namespace onert diff --git a/runtime/onert/core/include/ir/LoweredGraph.h b/runtime/onert/core/include/ir/LoweredGraph.h deleted file mode 100644 index d6583df24..000000000 --- a/runtime/onert/core/include/ir/LoweredGraph.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_LOWERED_GRAPH_H__ -#define __ONERT_IR_LOWERED_GRAPH_H__ - -#include "ir/Graph.h" -#include "ir/LowerInfoMap.h" -#include "ir/OpSequences.h" -#include "compiler/BackendResolver.h" -#include "compiler/Compiler.h" - -namespace onert -{ -namespace ir -{ - -/** - * @brief Class that contains lowering information on graph. - * In addition, after lowering, operands in graph will be set to "dynamic" - * if the shape of output of an operation cannot be decided at compilation time. - */ -class LoweredGraph -{ -public: - LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options); - - Graph &graph() { return _graph; } - const Graph &graph() const { return _graph; } - const LowerInfoMap *getLowerInfo() const { return &_lower_info_map; } - const operation::LowerInfo *getLowerInfo(const OpSequenceIndex &op_seq_index) const; - void setLowerInfo(const OpSequenceIndex &op_seq_index, - std::unique_ptr<operation::LowerInfo> &&lower_info); - void removeLowerInfo(const OpSequenceIndex &op_seq_index); - const operand::LowerInfo *getLowerInfo(const OperandIndex &index) const; - operand::LowerInfo *getLowerInfo(const OperandIndex &index); - void setLowerInfo(const OperandIndex &index, std::unique_ptr<operand::LowerInfo> &&lower_info); - void removeLowerInfo(const OperandIndex &index); - OpSequences &op_seqs() { return _op_seqs; } - const OpSequences &op_seqs() const { return _op_seqs; } - void iterateTopolOpSeqs( - const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const; - void iterateTopolOpSeqs(const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn); - const backend::BackendContexts &backend_contexts() { return _backend_contexts; } - const backend::BackendContexts &backend_contexts() const { return _backend_contexts; } - std::shared_ptr<ir::OperationIndexMap<int64_t>> indexed_ranks() { return _indexed_ranks; } - -private: - void makeOpSequences(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, - const compiler::CompilerOptions &options, - const compiler::BackendResolver &backend_resolver); - - void - manipulateLowerInfo(OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, - bool is_primary); - void dumpLowerInfo(); - bool mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index, - Layout layout, const compiler::BackendResolver &backend_resolver); - OpSequenceIndex appendFreshSingleOpSequence(const OperationIndex &node_index, - const Operation &node); - -private: - Graph _graph; - backend::BackendContexts _backend_contexts; - std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; - LowerInfoMap _lower_info_map; - // Pass(for Perm) can accept only graph so that Graph has OpSequences as a member - OpSequences _op_seqs; -}; - -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_LOWERED_GRAPH_H__ diff --git a/runtime/onert/core/include/ir/OpSequences.h b/runtime/onert/core/include/ir/OpSequences.h index 6ed8499bc..ab258f395 100644 --- a/runtime/onert/core/include/ir/OpSequences.h +++ b/runtime/onert/core/include/ir/OpSequences.h @@ -63,13 +63,6 @@ public: */ OpSequenceIndex getOperation(const OperationIndex &operation_index) const; /** - * @brief Dump OpSequences - * - * @param msg Message that will be displayed - * @param graph Graph that has information used for dump - */ - void dump(const std::string &msg, const Operations &operations) const; - /** * @brief Remove an operation from OpSequence * * @param operation_index Operation index to be removed @@ -84,6 +77,14 @@ private: mutable std::unordered_map<OperationIndex, OpSequenceIndex> _seq_indexes; }; +/** + * @brief Dump OpSequences + * + * @param op_seqs Operation Sequences + * @param operations Operation context + */ +void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations); + } // namespace ir } // namespace onert diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h index 30c4ff25a..17bbbc29c 100644 --- a/runtime/onert/core/include/ir/Operations.Include.h +++ b/runtime/onert/core/include/ir/Operations.Include.h @@ -17,10 +17,10 @@ // This file has no ifdef guard intentionally #include "ir/operation/BatchToSpaceND.h" +#include "ir/operation/BinaryArithmetic.h" #include "ir/operation/BroadcastTo.h" #include "ir/operation/Conv2D.h" -#include "ir/operation/MaxPool2D.h" -#include "ir/operation/AvgPool2D.h" +#include "ir/operation/Pool2D.h" #include "ir/operation/Concat.h" #include "ir/operation/Reshape.h" #include "ir/operation/Fill.h" @@ -29,51 +29,32 @@ #include "ir/operation/Transpose.h" #include "ir/operation/Permute.h" #include "ir/operation/Reduce.h" -#include "ir/operation/Add.h" -#include "ir/operation/Sub.h" #include "ir/operation/DepthwiseConv2D.h" #include "ir/operation/Slice.h" #include "ir/operation/StridedSlice.h" -#include "ir/operation/Mul.h" #include "ir/operation/Squeeze.h" -#include "ir/operation/Tanh.h" -#include "ir/operation/Log.h" -#include "ir/operation/Logistic.h" -#include "ir/operation/Cast.h" -#include "ir/operation/Div.h" -#include "ir/operation/Exp.h" +#include "ir/operation/ElementwiseActivation.h" +#include "ir/operation/ElementwiseBinary.h" +#include "ir/operation/ElementwiseUnary.h" #include "ir/operation/ExpandDims.h" #include "ir/operation/Comparison.h" -#include "ir/operation/LogicalAnd.h" -#include "ir/operation/LogicalOr.h" -#include "ir/operation/LogicalNot.h" #include "ir/operation/LSTM.h" -#include "ir/operation/RSQRT.h" -#include "ir/operation/ReLU.h" #include "ir/operation/ResizeBilinear.h" -#include "ir/operation/ReLU1.h" -#include "ir/operation/ReLU6.h" +#include "ir/operation/ResizeNearestNeighbor.h" #include "ir/operation/Reverse.h" #include "ir/operation/RNN.h" -#include "ir/operation/Round.h" -#include "ir/operation/Floor.h" #include "ir/operation/SpaceToBatchND.h" #include "ir/operation/SpaceToDepth.h" -#include "ir/operation/L2Pool2D.h" #include "ir/operation/EmbeddingLookup.h" #include "ir/operation/L2Normalization.h" #include "ir/operation/HashtableLookup.h" #include "ir/operation/InstanceNorm.h" #include "ir/operation/PReLU.h" #include "ir/operation/TransposeConv.h" -#include "ir/operation/SQRT.h" #include "ir/operation/SquaredDifference.h" #include "ir/operation/TopKV2.h" #include "ir/operation/Gather.h" -#include "ir/operation/Neg.h" -#include "ir/operation/Abs.h" #include "ir/operation/ArgMax.h" -#include "ir/operation/Dequantize.h" #include "ir/operation/LocalResponseNormalization.h" #include "ir/operation/DepthToSpace.h" #include "ir/operation/Pack.h" @@ -82,27 +63,22 @@ #include "ir/operation/SplitV.h" #include "ir/operation/Unpack.h" #include "ir/operation/Pad.h" -#include "ir/operation/Min.h" -#include "ir/operation/Max.h" #include "ir/operation/Custom.h" #include "ir/operation/Einsum.h" #include "ir/operation/OneHot.h" -#include "ir/operation/Cos.h" -#include "ir/operation/Sin.h" #include "ir/operation/Shape.h" #include "ir/operation/ConvertFp32ToFp16.h" #include "ir/operation/ConvertFp16ToFp32.h" #include "ir/operation/If.h" #include "ir/operation/While.h" #include "ir/operation/Pow.h" -#include "ir/operation/ZerosLike.h" #include "ir/operation/Tile.h" #include "ir/operation/Range.h" +#include "ir/operation/Rank.h" #include "ir/operation/BCQFullyConnected.h" #include "ir/operation/BCQGather.h" #include "ir/operation/MatrixBandPart.h" #include "ir/operation/BatchMatMul.h" #include "ir/operation/FusedBatchNorm.h" #include "ir/operation/LogSoftmax.h" -#include "ir/operation/Quantize.h" #include "ir/operation/StatelessRandomUniform.h" diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst index 75c6d8221..ab2146821 100644 --- a/runtime/onert/core/include/ir/Operations.lst +++ b/runtime/onert/core/include/ir/Operations.lst @@ -19,62 +19,44 @@ #endif // Internal Name -OP(Add) -OP(Sub) OP(BatchToSpaceND) +OP(BinaryArithmetic) OP(BroadcastTo) -OP(Cast) OP(Conv2D) OP(DepthwiseConv2D) -OP(AvgPool2D) -OP(MaxPool2D) +OP(Pool2D) OP(Concat) OP(Fill) OP(FullyConnected) OP(Reduce) OP(Reshape) -OP(Mul) OP(Softmax) OP(Squeeze) OP(Slice) OP(StridedSlice) -OP(Tanh) -OP(Logistic) -OP(Div) OP(Transpose) -OP(Exp) +OP(ElementwiseActivation) +OP(ElementwiseBinary) +OP(ElementwiseUnary) OP(ExpandDims) OP(Comparison) -OP(LogicalAnd) -OP(LogicalOr) -OP(LogicalNot) OP(LSTM) -OP(RSQRT) -OP(ReLU) OP(ResizeBilinear) -OP(ReLU1) -OP(ReLU6) +OP(ResizeNearestNeighbor) OP(Reverse) OP(RNN) -OP(Round) -OP(Floor) OP(SpaceToBatchND) OP(SpaceToDepth) -OP(L2Pool2D) OP(EmbeddingLookup) OP(L2Normalization) OP(HashtableLookup) OP(InstanceNorm) OP(PReLU) OP(TransposeConv) -OP(SQRT) OP(SquaredDifference) OP(TopKV2) OP(Gather) -OP(Neg) -OP(Abs) OP(ArgMax) -OP(Dequantize) OP(Einsum) OP(LocalResponseNormalization) OP(DepthToSpace) @@ -86,26 +68,20 @@ OP(Unpack) OP(Pad) OP(Custom) OP(Permute) -OP(Min) -OP(Max) OP(OneHot) -OP(Cos) -OP(Sin) OP(Shape) OP(ConvertFp32ToFp16) OP(ConvertFp16ToFp32) OP(If) OP(While) -OP(Log) OP(Pow) -OP(ZerosLike) OP(Tile) OP(Range) +OP(Rank) OP(BCQFullyConnected) OP(BCQGather) OP(MatrixBandPart) OP(BatchMatMul) OP(FusedBatchNorm) OP(LogSoftmax) -OP(Quantize) OP(StatelessRandomUniform) diff --git a/runtime/onert/core/include/ir/Padding.h b/runtime/onert/core/include/ir/Padding.h index b9053914d..8a7bcdbeb 100644 --- a/runtime/onert/core/include/ir/Padding.h +++ b/runtime/onert/core/include/ir/Padding.h @@ -65,7 +65,8 @@ struct Padding // TODO Change to Padding struct's method const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, - uint32_t kw, uint32_t kh); + uint32_t kw, uint32_t kh, uint32_t dwf = 1, + uint32_t dhf = 1); } // namespace ir } // namespace onert diff --git a/runtime/onert/core/include/ir/operation/Abs.h b/runtime/onert/core/include/ir/operation/Abs.h deleted file mode 100644 index 9126c0027..000000000 --- a/runtime/onert/core/include/ir/operation/Abs.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_ABS_H__ -#define __ONERT_IR_OPERATION_ABS_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Abs : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Abs; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_ABS_H__ diff --git a/runtime/onert/core/include/ir/operation/Add.h b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h index 5f5f4e0fe..110fff565 100644 --- a/runtime/onert/core/include/ir/operation/Add.h +++ b/runtime/onert/core/include/ir/operation/BinaryArithmetic.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_ADD_H__ -#define __ONERT_IR_OPERATION_ADD_H__ +#ifndef __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__ +#define __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__ #include "ir/Operation.h" #include "ir/InternalType.h" @@ -27,7 +27,7 @@ namespace ir namespace operation { -class Add : public Operation +class BinaryArithmetic final : public Operation { public: enum Input @@ -36,17 +36,28 @@ public: RHS }; + enum class ArithmeticType + { + ADD, + SUB, + MUL, + DIV + }; + struct Param { + ArithmeticType arithmetic_type; Activation activation; }; public: - Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + BinaryArithmetic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Add; } + std::string name() const override; + OpCode opcode() const final { return OpCode::BinaryArithmetic; } public: const Param ¶m() const { return _param; } @@ -59,4 +70,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_ADD_H__ +#endif // __ONERT_IR_OPERATION_BINARY_ARITHMETIC_H__ diff --git a/runtime/onert/core/include/ir/operation/BroadcastTo.h b/runtime/onert/core/include/ir/operation/BroadcastTo.h index 98906adc2..06c033497 100644 --- a/runtime/onert/core/include/ir/operation/BroadcastTo.h +++ b/runtime/onert/core/include/ir/operation/BroadcastTo.h @@ -42,7 +42,7 @@ public: public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Cast; } + OpCode opcode() const final { return OpCode::BroadcastTo; } }; } // namespace operation diff --git a/runtime/onert/core/include/ir/operation/Cast.h b/runtime/onert/core/include/ir/operation/Cast.h deleted file mode 100644 index 6fb8c105b..000000000 --- a/runtime/onert/core/include/ir/operation/Cast.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_CAST_H__ -#define __ONERT_IR_OPERATION_CAST_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Cast : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Cast; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_CAST_H__ diff --git a/runtime/onert/core/include/ir/operation/Conv2D.h b/runtime/onert/core/include/ir/operation/Conv2D.h index e23bf3eb3..d8c7b671b 100644 --- a/runtime/onert/core/include/ir/operation/Conv2D.h +++ b/runtime/onert/core/include/ir/operation/Conv2D.h @@ -45,6 +45,7 @@ public: Stride stride; Padding padding; Activation activation; + Dilation dilation; }; public: diff --git a/runtime/onert/core/include/ir/operation/Cos.h b/runtime/onert/core/include/ir/operation/Cos.h deleted file mode 100644 index a6d7851bd..000000000 --- a/runtime/onert/core/include/ir/operation/Cos.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_COS_H__ -#define __ONERT_IR_OPERATION_COS_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Cos : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Cos; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_COS_H__ diff --git a/runtime/onert/core/include/ir/operation/Dequantize.h b/runtime/onert/core/include/ir/operation/Dequantize.h deleted file mode 100644 index 97a08b33c..000000000 --- a/runtime/onert/core/include/ir/operation/Dequantize.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_DEQUANTIZE_H__ -#define __ONERT_IR_OPERATION_DEQUANTIZE_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Dequantize : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Dequantize; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_DEQUANTIZE_H__ diff --git a/runtime/onert/core/include/ir/operation/Einsum.h b/runtime/onert/core/include/ir/operation/Einsum.h index a3426ccbc..9892c24b8 100644 --- a/runtime/onert/core/include/ir/operation/Einsum.h +++ b/runtime/onert/core/include/ir/operation/Einsum.h @@ -41,7 +41,7 @@ public: public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Add; } + OpCode opcode() const final { return OpCode::Einsum; } public: const Param ¶m() const { return _param; } diff --git a/runtime/onert/core/include/ir/operation/Div.h b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h index a7ec1c465..b2a1d3d2d 100644 --- a/runtime/onert/core/include/ir/operation/Div.h +++ b/runtime/onert/core/include/ir/operation/ElementwiseActivation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,11 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_DIV_H__ -#define __ONERT_IR_OPERATION_DIV_H__ +#ifndef __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__ +#define __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__ #include "ir/Operation.h" -#include "ir/InternalType.h" namespace onert { @@ -27,30 +26,46 @@ namespace ir namespace operation { -class Div : public Operation +class ElementwiseActivation : public Operation { public: enum Input { - LHS = 0, - RHS + INPUT = 0 + }; + + enum class Type + { + ELU, + LOGISTIC, + RELU, + TANH, + LEAKY_RELU }; struct Param { - Activation activation; + Type op_type; + float alpha; + float beta; + Param() : op_type(Type::ELU), alpha(0.0f), beta(0.0f) {} }; public: - Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + ElementwiseActivation(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Div; } + std::string name() const override; + OpCode opcode() const final { return OpCode::ElementwiseActivation; } public: const Param ¶m() const { return _param; } +public: + static float infinity; + private: Param _param; }; @@ -59,4 +74,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_DIV_H__ +#endif // __ONERT_IR_OPERATION_ELEMENTWISE_ACTIVATION_H__ diff --git a/runtime/onert/core/include/ir/operation/Mul.h b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h index 0f01b0ecf..dd07f6058 100644 --- a/runtime/onert/core/include/ir/operation/Mul.h +++ b/runtime/onert/core/include/ir/operation/ElementwiseBinary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,11 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_MUL_H__ -#define __ONERT_IR_OPERATION_MUL_H__ +#ifndef __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__ +#define __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__ #include "ir/Operation.h" -#include "ir/InternalType.h" namespace onert { @@ -27,7 +26,7 @@ namespace ir namespace operation { -class Mul : public Operation +class ElementwiseBinary : public Operation { public: enum Input @@ -36,17 +35,27 @@ public: RHS }; + enum class ElementwiseBinaryType + { + LOGICAL_AND, + LOGICAL_OR, + MAX, + MIN + }; + struct Param { - Activation activation; + ElementwiseBinaryType op_type; }; public: - Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); + ElementwiseBinary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Mul; } + std::string name() const override; + OpCode opcode() const final { return OpCode::ElementwiseBinary; } public: const Param ¶m() const { return _param; } @@ -59,4 +68,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_MUL_H__ +#endif // __ONERT_IR_OPERATION_ELEMENTWISEBINARY_H__ diff --git a/runtime/onert/core/include/ir/operation/MaxPool2D.h b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h index 300f7cb3c..c40778a56 100644 --- a/runtime/onert/core/include/ir/operation/MaxPool2D.h +++ b/runtime/onert/core/include/ir/operation/ElementwiseUnary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_MAXPOOL2D_H__ -#define __ONERT_IR_OPERATION_MAXPOOL2D_H__ - -#include <memory> +#ifndef __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ +#define __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ #include "ir/Operation.h" -#include "ir/InternalType.h" -#include "ir/Padding.h" namespace onert { @@ -30,7 +26,7 @@ namespace ir namespace operation { -class MaxPool2D : public Operation +class ElementwiseUnary : public Operation { public: enum Input @@ -38,22 +34,40 @@ public: INPUT = 0 }; + enum class Type + { + ABS, + CAST, + COS, + DEQUANTIZE, + ERF, + EXP, + FLOOR, + LOG, + LOGICAL_NOT, + NEG, + QUANTIZE, + ROUND, + RSQRT, + SIN, + SQRT, + SQURE, + ZEROS_LIKE + }; + struct Param { - uint32_t kh; - uint32_t kw; - Stride stride; - Padding padding; - Activation activation; + Type op_type; }; public: - MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m); + ElementwiseUnary(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::MaxPool2D; } + std::string name() const override; + OpCode opcode() const final { return OpCode::ElementwiseUnary; } public: const Param ¶m() const { return _param; } @@ -66,4 +80,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_MAXPOOL2D_H__ +#endif // __ONERT_IR_OPERATION_ELEMENTWISEUNARY_H__ diff --git a/runtime/onert/core/include/ir/operation/Exp.h b/runtime/onert/core/include/ir/operation/Exp.h deleted file mode 100644 index 2e68ff07a..000000000 --- a/runtime/onert/core/include/ir/operation/Exp.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_EXP_H__ -#define __ONERT_IR_OPERATION_EXP_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Exp : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Exp; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_EXP_H__ diff --git a/runtime/onert/core/include/ir/operation/Floor.h b/runtime/onert/core/include/ir/operation/Floor.h deleted file mode 100644 index b34699c22..000000000 --- a/runtime/onert/core/include/ir/operation/Floor.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_FLOOR_H__ -#define __ONERT_IR_OPERATION_FLOOR_H__ - -#include <memory> - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Floor : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Floor; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_FLOOR_H__ diff --git a/runtime/onert/core/include/ir/operation/Log.h b/runtime/onert/core/include/ir/operation/Log.h deleted file mode 100644 index a6e3ca3f6..000000000 --- a/runtime/onert/core/include/ir/operation/Log.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_LOG_H__ -#define __ONERT_IR_OPERATION_LOG_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Log : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Log; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_LOG_H__ diff --git a/runtime/onert/core/include/ir/operation/LogicalAnd.h b/runtime/onert/core/include/ir/operation/LogicalAnd.h deleted file mode 100644 index dc853b6a9..000000000 --- a/runtime/onert/core/include/ir/operation/LogicalAnd.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_LOGICAL_AND_H__ -#define __ONERT_IR_OPERATION_LOGICAL_AND_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class LogicalAnd : public Operation -{ -public: - enum Input - { - INPUT0 = 0, - INPUT1 = 1, - }; - -public: - LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::LogicalAnd; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_LOGICAL_AND_H__ diff --git a/runtime/onert/core/include/ir/operation/LogicalNot.h b/runtime/onert/core/include/ir/operation/LogicalNot.h deleted file mode 100644 index 9519f6d47..000000000 --- a/runtime/onert/core/include/ir/operation/LogicalNot.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_LOGICAL_NOT_H__ -#define __ONERT_IR_OPERATION_LOGICAL_NOT_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class LogicalNot : public Operation -{ -public: - enum Input - { - INPUT = 0, - }; - -public: - LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::LogicalNot; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_LOGICAL_NOT_H__ diff --git a/runtime/onert/core/include/ir/operation/LogicalOr.h b/runtime/onert/core/include/ir/operation/LogicalOr.h deleted file mode 100644 index c4b658cd9..000000000 --- a/runtime/onert/core/include/ir/operation/LogicalOr.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_LOGICAL_OR_H__ -#define __ONERT_IR_OPERATION_LOGICAL_OR_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class LogicalOr : public Operation -{ -public: - enum Input - { - INPUT0 = 0, - INPUT1 = 1, - }; - -public: - LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::LogicalOr; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_LOGICAL_OR_H__ diff --git a/runtime/onert/core/include/ir/operation/Logistic.h b/runtime/onert/core/include/ir/operation/Logistic.h deleted file mode 100644 index 5421e1c84..000000000 --- a/runtime/onert/core/include/ir/operation/Logistic.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_LOGISTIC_H__ -#define __ONERT_IR_OPERATION_LOGISTIC_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Logistic : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Logistic; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_LOGISTIC_H__ diff --git a/runtime/onert/core/include/ir/operation/Max.h b/runtime/onert/core/include/ir/operation/Max.h deleted file mode 100644 index df72d3ae9..000000000 --- a/runtime/onert/core/include/ir/operation/Max.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_MAX_H__ -#define __ONERT_IR_OPERATION_MAX_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Max : public Operation -{ -public: - enum Input - { - LHS = 0, - RHS - }; - -public: - Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Max; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_MAX_H__ diff --git a/runtime/onert/core/include/ir/operation/Mean.h b/runtime/onert/core/include/ir/operation/Mean.h deleted file mode 100644 index ce2da908d..000000000 --- a/runtime/onert/core/include/ir/operation/Mean.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_MEAN_H__ -#define __ONERT_IR_OPERATION_MEAN_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Mean : public Operation -{ -public: - enum Input - { - INPUT, - AXES - }; - - struct Param - { - bool keep_dims; - }; - -public: - Mean(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Mean; } - -public: - const Param ¶m() const { return _param; } - -private: - Param _param; -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_MEAN_H__ diff --git a/runtime/onert/core/include/ir/operation/Min.h b/runtime/onert/core/include/ir/operation/Min.h deleted file mode 100644 index 117301c00..000000000 --- a/runtime/onert/core/include/ir/operation/Min.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_MIN_H__ -#define __ONERT_IR_OPERATION_MIN_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Min : public Operation -{ -public: - enum Input - { - LHS = 0, - RHS - }; - -public: - Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Min; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_MIN_H__ diff --git a/runtime/onert/core/include/ir/operation/Neg.h b/runtime/onert/core/include/ir/operation/Neg.h deleted file mode 100644 index f8123c485..000000000 --- a/runtime/onert/core/include/ir/operation/Neg.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_NEG_H__ -#define __ONERT_IR_OPERATION_NEG_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Neg : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Neg; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_NEG_H__ diff --git a/runtime/onert/core/include/ir/operation/AvgPool2D.h b/runtime/onert/core/include/ir/operation/Pool2D.h index d5b300a35..22425b4c2 100644 --- a/runtime/onert/core/include/ir/operation/AvgPool2D.h +++ b/runtime/onert/core/include/ir/operation/Pool2D.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_AVGPOOL2D_H__ -#define __ONERT_IR_OPERATION_AVGPOOL2D_H__ +#ifndef __ONERT_IR_OPERATION_POOL2D_H__ +#define __ONERT_IR_OPERATION_POOL2D_H__ #include <memory> @@ -30,7 +30,7 @@ namespace ir namespace operation { -class AvgPool2D : public Operation +class Pool2D : public Operation { public: enum Input @@ -38,23 +38,31 @@ public: INPUT = 0 }; + enum class PoolType + { + AVG, + L2, + MAX, + }; + struct Param { + PoolType op_type; uint32_t kh; uint32_t kw; - Stride stride; Padding padding; Activation activation; }; public: - AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m); + Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::AvgPool2D; } + std::string name() const override; + OpCode opcode() const final { return OpCode::Pool2D; } public: const Param ¶m() const { return _param; } @@ -67,4 +75,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_AVGPOOL2D_H__ +#endif // __ONERT_IR_OPERATION_POOL2D_H__ diff --git a/runtime/onert/core/include/ir/operation/Quantize.h b/runtime/onert/core/include/ir/operation/Quantize.h deleted file mode 100644 index 2533ce432..000000000 --- a/runtime/onert/core/include/ir/operation/Quantize.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_QUANTIZE_H__ -#define __ONERT_IR_OPERATION_QUANTIZE_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Quantize : public Operation -{ -public: - enum Input - { - INPUT = 0, - }; - -public: - Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Quantize; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_QUANTIZE_H__ diff --git a/runtime/onert/core/include/ir/operation/RSQRT.h b/runtime/onert/core/include/ir/operation/RSQRT.h deleted file mode 100644 index 64bb4f10a..000000000 --- a/runtime/onert/core/include/ir/operation/RSQRT.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_RSQRT_H__ -#define __ONERT_IR_OPERATION_RSQRT_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class RSQRT : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::RSQRT; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_RSQRT_H__ diff --git a/runtime/onert/core/include/ir/operation/Round.h b/runtime/onert/core/include/ir/operation/Rank.h index 44af0d861..2fd24ce23 100644 --- a/runtime/onert/core/include/ir/operation/Round.h +++ b/runtime/onert/core/include/ir/operation/Rank.h @@ -14,8 +14,10 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_ROUND_H__ -#define __ONERT_IR_OPERATION_ROUND_H__ +#ifndef __ONERT_IR_OPERATION_RANK_H__ +#define __ONERT_IR_OPERATION_RANK_H__ + +#include <memory> #include "ir/Operation.h" @@ -26,7 +28,7 @@ namespace ir namespace operation { -class Round : public Operation +class Rank : public Operation { public: enum Input @@ -35,15 +37,15 @@ public: }; public: - Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); + Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Round; } + OpCode opcode() const final { return OpCode::Rank; } }; } // namespace operation } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_ROUND_H__ +#endif // __ONERT_IR_OPERATION_RANK_H__ diff --git a/runtime/onert/core/include/ir/operation/ReLU.h b/runtime/onert/core/include/ir/operation/ReLU.h deleted file mode 100644 index 9eb0c091b..000000000 --- a/runtime/onert/core/include/ir/operation/ReLU.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_RELU_H__ -#define __ONERT_IR_OPERATION_RELU_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class ReLU : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::ReLU; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_RELU_H__ diff --git a/runtime/onert/core/include/ir/operation/ReLU1.h b/runtime/onert/core/include/ir/operation/ReLU1.h deleted file mode 100644 index 134ee573a..000000000 --- a/runtime/onert/core/include/ir/operation/ReLU1.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_ReLU1_H__ -#define __ONERT_IR_OPERATION_ReLU1_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class ReLU1 : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::ReLU1; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_ReLU1_H__ diff --git a/runtime/onert/core/include/ir/operation/ReLU6.h b/runtime/onert/core/include/ir/operation/ReLU6.h deleted file mode 100644 index e658c4925..000000000 --- a/runtime/onert/core/include/ir/operation/ReLU6.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_ReLU6_H__ -#define __ONERT_IR_OPERATION_ReLU6_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class ReLU6 : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::ReLU6; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_ReLU6_H__ diff --git a/runtime/onert/core/include/ir/operation/L2Pool2D.h b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h index d369fd5fc..e4d810eeb 100644 --- a/runtime/onert/core/include/ir/operation/L2Pool2D.h +++ b/runtime/onert/core/include/ir/operation/ResizeNearestNeighbor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,12 @@ * limitations under the License. */ -#ifndef __ONERT_IR_OPERATION_L2_POOL_2D_H__ -#define __ONERT_IR_OPERATION_L2_POOL_2D_H__ +#ifndef __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__ +#define __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__ #include <memory> #include "ir/Operation.h" -#include "ir/InternalType.h" -#include "ir/Padding.h" namespace onert { @@ -30,7 +28,7 @@ namespace ir namespace operation { -class L2Pool2D : public Operation +class ResizeNearestNeighbor : public Operation { public: enum Input @@ -40,20 +38,18 @@ public: struct Param { - Padding padding; - Stride stride; - uint32_t kw; - uint32_t kh; - Activation activation; + int32_t height_out; + int32_t width_out; + bool align_corners; }; public: - L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m); + ResizeNearestNeighbor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m); public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::L2Pool2D; } + OpCode opcode() const final { return OpCode::ResizeNearestNeighbor; } public: const Param ¶m() const { return _param; } @@ -66,4 +62,4 @@ private: } // namespace ir } // namespace onert -#endif // __ONERT_IR_OPERATION_L2_POOL_2D_H__ +#endif // __ONERT_IR_OPERATION_RESIZE_NEAREST_NEIGHBOR_H__ diff --git a/runtime/onert/core/include/ir/operation/SQRT.h b/runtime/onert/core/include/ir/operation/SQRT.h deleted file mode 100644 index 8563b1ab1..000000000 --- a/runtime/onert/core/include/ir/operation/SQRT.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_SQRT_H__ -#define __ONERT_IR_OPERATION_SQRT_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class SQRT : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::SQRT; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_SQRT_H__ diff --git a/runtime/onert/core/include/ir/operation/Select.h b/runtime/onert/core/include/ir/operation/Select.h index 400ac9d3e..33bf67886 100644 --- a/runtime/onert/core/include/ir/operation/Select.h +++ b/runtime/onert/core/include/ir/operation/Select.h @@ -41,7 +41,7 @@ public: public: void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Abs; } + OpCode opcode() const final { return OpCode::Select; } }; } // namespace operation diff --git a/runtime/onert/core/include/ir/operation/Sin.h b/runtime/onert/core/include/ir/operation/Sin.h deleted file mode 100644 index aef44ab2e..000000000 --- a/runtime/onert/core/include/ir/operation/Sin.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_SIN_H__ -#define __ONERT_IR_OPERATION_SIN_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Sin : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Sin; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_SIN_H__ diff --git a/runtime/onert/core/include/ir/operation/Sub.h b/runtime/onert/core/include/ir/operation/Sub.h deleted file mode 100644 index 0674e6e4d..000000000 --- a/runtime/onert/core/include/ir/operation/Sub.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_SUB_H__ -#define __ONERT_IR_OPERATION_SUB_H__ - -#include "ir/Operation.h" -#include "ir/InternalType.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Sub : public Operation -{ -public: - enum Input - { - LHS = 0, - RHS - }; - - struct Param - { - Activation activation; - }; - -public: - Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param ¶m); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Sub; } - -public: - const Param ¶m() const { return _param; } - -private: - Param _param; -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_SUB_H__ diff --git a/runtime/onert/core/include/ir/operation/Tanh.h b/runtime/onert/core/include/ir/operation/Tanh.h deleted file mode 100644 index 9b8d03bca..000000000 --- a/runtime/onert/core/include/ir/operation/Tanh.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_TANH_H__ -#define __ONERT_IR_OPERATION_TANH_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class Tanh : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::Tanh; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_TANH_H__ diff --git a/runtime/onert/core/include/ir/operation/ZerosLike.h b/runtime/onert/core/include/ir/operation/ZerosLike.h deleted file mode 100644 index 7c2851858..000000000 --- a/runtime/onert/core/include/ir/operation/ZerosLike.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_IR_OPERATION_ZEROS_LIKE_H__ -#define __ONERT_IR_OPERATION_ZEROS_LIKE_H__ - -#include "ir/Operation.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -class ZerosLike : public Operation -{ -public: - enum Input - { - INPUT = 0 - }; - -public: - ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs); - -public: - void accept(OperationVisitor &v) const override; - OpCode opcode() const final { return OpCode::ZerosLike; } -}; - -} // namespace operation -} // namespace ir -} // namespace onert - -#endif // __ONERT_IR_OPERATION_ZEROS_LIKE_H__ diff --git a/runtime/onert/core/include/util/Config.lst b/runtime/onert/core/include/util/Config.lst index 1718e034c..5077fad69 100644 --- a/runtime/onert/core/include/util/Config.lst +++ b/runtime/onert/core/include/util/Config.lst @@ -20,7 +20,7 @@ // Name | Type | Default CONFIG(GRAPH_DOT_DUMP , int , "0") -CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon") +CONFIG(BACKENDS , std::string , "cpu;acl_cl;acl_neon;bcq") // FIXME Remove bcq CONFIG(OP_BACKEND_ALLOPS , std::string , "") CONFIG(OP_BACKEND_MAP , std::string , "") CONFIG(DISABLE_COMPILE , bool , "0") diff --git a/runtime/onert/backend/cpu/ops/ExpLayer.h b/runtime/onert/core/include/util/Exceptions.h index cd27b0e40..fc3fa0f64 100644 --- a/runtime/onert/backend/cpu/ops/ExpLayer.h +++ b/runtime/onert/core/include/util/Exceptions.h @@ -14,44 +14,35 @@ * limitations under the License. */ -#ifndef __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ -#define __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ +#ifndef __ONERT_UTIL_ONERTEXCEPTION_H__ +#define __ONERT_UTIL_ONERTEXCEPTION_H__ -#include <backend/IPortableTensor.h> - -#include <exec/IFunction.h> +#include <string> namespace onert { -namespace backend -{ -namespace cpu -{ -namespace ops -{ -class ExpLayer : public ::onert::exec::IFunction +class OnertException : public std::exception { public: - ExpLayer(); - -public: - void expFloat32(); + OnertException(const std::string &msg) : _msg{msg} {} + OnertException(const std::string &tag, const std::string &msg) : _msg{tag + " : " + msg} {} - void expQuant8(); - - void configure(const IPortableTensor *input, IPortableTensor *output); - - void run() override; + const char *what() const noexcept override { return _msg.c_str(); } private: - const IPortableTensor *_input; - IPortableTensor *_output; + std::string _msg; +}; + +class InsufficientBufferSizeException : public OnertException +{ +public: + InsufficientBufferSizeException(const std::string &msg) + : OnertException{"InsufficientBufferSize", msg} + { + } }; -} // namespace ops -} // namespace cpu -} // namespace backend } // namespace onert -#endif // __ONERT_BACKEND_CPU_OPS_EXPLAYER_H__ +#endif // __ONERT_UTIL_ONERTEXCEPTION_H__ diff --git a/runtime/onert/core/include/util/ShapeInference.h b/runtime/onert/core/include/util/ShapeInference.h index a68c22b16..1ebed48f2 100644 --- a/runtime/onert/core/include/util/ShapeInference.h +++ b/runtime/onert/core/include/util/ShapeInference.h @@ -19,15 +19,13 @@ #include "Utils.h" -#include "ir/operation/AvgPool2D.h" #include "ir/operation/Concat.h" -#include "ir/operation/MaxPool2D.h" #include "ir/operation/Conv2D.h" #include "ir/operation/DepthwiseConv2D.h" +#include "ir/operation/Pool2D.h" #include "ir/operation/Reshape.h" -#include "ir/operation/RSQRT.h" #include "ir/operation/StridedSlice.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" #include "ir/Index.h" #include "ir/Layout.h" #include "ir/OperationVisitor.h" @@ -46,8 +44,6 @@ using Shapes = std::vector<ir::Shape>; ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank); -ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m, - ir::Layout layout = ir::Layout::NHWC); ir::Shape inferBatchMatMulShape(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape, const ir::operation::BatchMatMul::Param ¶m); @@ -74,15 +70,15 @@ ir::Shape inferFullyConnectedShape(const ir::Shape &in_shape, const ir::Shape &k ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indices_shape, int axis, int rank); -ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m, - ir::Layout layout = ir::Layout::NHWC); - ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis); ir::Shape inferPackShape(const ir::Shape &input_shape, int axis, int rank, int num); ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const size_t num_pads); +ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param ¶m, + ir::Layout layout = ir::Layout::NHWC); + template <typename T> ir::Shape inferRangeShape(T start_val, T limit_val, T delta_val); ir::Shape inferReshapeShape(const int32_t *shape_buf, const int32_t shape_num_elements, diff --git a/runtime/onert/core/src/backend/controlflow/Backend.h b/runtime/onert/core/src/backend/controlflow/Backend.h index 3c7325912..670f7750f 100644 --- a/runtime/onert/core/src/backend/controlflow/Backend.h +++ b/runtime/onert/core/src/backend/controlflow/Backend.h @@ -21,6 +21,7 @@ #include "ConstantInitializer.h" #include "KernelGenerator.h" #include "TensorBuilder.h" +#include "Tensor.h" #include <backend/Backend.h> @@ -63,10 +64,12 @@ public: // there is no such case until now, let's support it later // TODO Remove TensorBuilder and ConstantInitializer // TODO Support Consecutive controflow operation's intermediate tensor - auto tb = std::make_shared<TensorBuilder>(); + auto tr = std::make_shared<TensorRegistry>(); + auto tb = std::make_shared<TensorBuilder>(tr); + context->tensor_registry = tr; context->tensor_builder = tb; - context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tb); - context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb); + context->constant_initializer = std::make_shared<ConstantInitializer>(operands, tr); + context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb->dynamicTensorManager(), tr); context->tensor_register = nullptr; context->optimizer = nullptr; return context; diff --git a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h index 35cc7835e..e21a8f357 100644 --- a/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h +++ b/runtime/onert/core/src/backend/controlflow/ConstantInitializer.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ #define __ONERT_COMPILER_CONTROLFLOW_CONSTANT_INITIALIZER_H__ -#include "TensorBuilder.h" +#include "TensorRegistry.h" #include <backend/IConstantInitializer.h> #include <ir/Operands.h> @@ -33,16 +33,16 @@ class ConstantInitializer : public IConstantInitializer { public: ConstantInitializer(const ir::Operands &operands, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : IConstantInitializer{operands}, _tensor_builder{tensor_builder} + const std::shared_ptr<ITensorRegistry> &tensor_reg) + : IConstantInitializer{operands}, _tensor_reg{tensor_reg} { } private: - std::shared_ptr<ITensorBuilder> tensor_builder() const override { return _tensor_builder; } + std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; } private: - std::shared_ptr<TensorBuilder> _tensor_builder; + std::shared_ptr<ITensorRegistry> _tensor_reg; }; } // namespace controlflow diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc index e538f3fd3..1288e4c96 100644 --- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc +++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.cc @@ -17,6 +17,8 @@ #include "DynamicTensorManager.h" #include "util/logging.h" +#include "util/Exceptions.h" +#include "ir/DataType.h" namespace onert { @@ -25,10 +27,8 @@ namespace backend namespace controlflow { -DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - const std::shared_ptr<UserTensorRegistry> &user_reg) - : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{reg}, - _user_tensors{user_reg} +DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors) + : _dynamic_mem_mgr{new cpu_common::DynamicMemoryManager()}, _tensors{tensors} { // DO NOTHING } @@ -36,20 +36,20 @@ DynamicTensorManager::DynamicTensorManager(const std::shared_ptr<cpu_common::Ten void DynamicTensorManager::applyShape(const ir::OperandIndex &ind, const ir::Shape &new_shape) { // NOTE Handle user tensors first - auto user_tensor = _user_tensors->getNativeTensor(ind); + auto user_tensor = _tensors->getNativeUserTensor(ind); if (user_tensor) { // User tensors cannot be reallocated. auto buffer_size = user_tensor->total_size(); auto new_size = new_shape.num_elements() * sizeOfDataType(user_tensor->data_type()); if (buffer_size < new_size) - throw std::runtime_error{"ExecutorBase: output buffer size is less than output tensor size"}; + throw InsufficientBufferSizeException{"Output buffer size is less than output tensor size"}; user_tensor->setShape(new_shape); return; } - // NOTE Then handle native tensors - auto tensor = _tensors->getNativeTensor(ind); + // NOTE Then handle own tensors + auto tensor = _tensors->getNativeOwnTensor(ind); assert(tensor); bool previously_dynamic = tensor->is_dynamic(); @@ -102,24 +102,13 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &tensor_info, ir::Layout backend_layout) { - assert(_tensors->getNativeTensor(ind) == nullptr); auto tensor = std::make_shared<cpu_common::Tensor>(tensor_info, backend_layout, this); - _tensors->setNativeTensor(ind, tensor); + _tensors->setNativeOwnTensor(ind, tensor); } void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) { - auto find = _dealloc_tensor_map.find(op_ind); - if (find != _dealloc_tensor_map.end()) - { - auto &input_set = find->second; - input_set.emplace(operand_ind); - } - else - { - _dealloc_tensor_map.emplace( - std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind})); - } + _dealloc_tensor_map[op_ind].emplace(operand_ind); } void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind) diff --git a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h index 446427d64..dbe388ba2 100644 --- a/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h +++ b/runtime/onert/core/src/backend/controlflow/DynamicTensorManager.h @@ -17,11 +17,11 @@ #ifndef __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__ #define __ONERT_BACKEND_CONTROLFLOW_DYNAMICTENSOR_MANAGER_H__ -#include "UserTensorRegistry.h" +#include "TensorRegistry.h" +#include "Tensor.h" #include <backend/IDynamicTensorManager.h> #include <backend/cpu_common/MemoryManager.h> -#include <backend/cpu_common/TensorRegistry.h> #include <ir/OperandInfo.h> #include <ir/Operation.h> #include <ir/Index.h> @@ -33,16 +33,13 @@ namespace backend namespace controlflow { -// TODO Find optimized algorithm to manage memory. - /** * @brief Class to manage dynamic tensor and its memory */ class DynamicTensorManager : public backend::IDynamicTensorManager { public: - DynamicTensorManager(const std::shared_ptr<cpu_common::TensorRegistry> ®, - const std::shared_ptr<UserTensorRegistry> &user_reg); + DynamicTensorManager(const std::shared_ptr<TensorRegistry> &tensors); virtual ~DynamicTensorManager() = default; @@ -61,9 +58,7 @@ private: * @todo DynamicMemoryManager is not optimized. Optimized one is needed */ std::shared_ptr<cpu_common::DynamicMemoryManager> _dynamic_mem_mgr; - // TODO Refactoring : Merge two TensorRegistries into one - const std::shared_ptr<cpu_common::TensorRegistry> _tensors; - const std::shared_ptr<UserTensorRegistry> _user_tensors; + const std::shared_ptr<TensorRegistry> _tensors; // contains list of dynamic tensor index, which can be deallocated after running operation // note: this map could contain static tensor index too. Careful use is required. diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc index eb83b7de4..de5a6a5f6 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.cc @@ -31,24 +31,24 @@ namespace backend namespace controlflow { -KernelGenerator::KernelGenerator(const ir::Graph &graph, - const std::shared_ptr<TensorBuilder> &tensor_builder) - : _graph{graph}, _tensor_builder{tensor_builder}, _tensor_builder_set{}, _executor_map{nullptr} +KernelGenerator::KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, + const std::shared_ptr<TensorRegistry> &tensor_reg) + : _graph{graph}, _dyn_tensor_manager{dyn_tensor_manager}, _tensor_reg{tensor_reg}, + _tensor_registries{}, _executor_map{nullptr} { UNUSED_RELEASE(_graph); - UNUSED_RELEASE(_tensor_builder_set); + UNUSED_RELEASE(_tensor_registries); UNUSED_RELEASE(_executor_map); } void KernelGenerator::visit(const ir::OpSequence &op_seq) { assert(!_return_fn_seq); - assert(_tensor_builder->dynamicTensorManager()); - assert(_tensor_builder->tensorRegistry()); + assert(_dyn_tensor_manager); + assert(_tensor_reg); - auto dyn_tensor_manager = _tensor_builder->dynamicTensorManager(); - auto dyn_shape_inferer = std::make_unique<exec::DynamicShapeInferer>( - _graph.operands(), dyn_tensor_manager, _tensor_builder->tensorRegistry()); + auto dyn_shape_inferer = + std::make_unique<exec::DynamicShapeInferer>(_graph.operands(), _tensor_reg); _return_fn_seq = std::make_unique<exec::FunctionSequence>(); @@ -58,8 +58,8 @@ void KernelGenerator::visit(const ir::OpSequence &op_seq) dyn_ctx->op_seq = &op_seq; dyn_ctx->operations = &_graph.operations(); dyn_ctx->dynamic_shape_inferer = std::move(dyn_shape_inferer); - dyn_ctx->tensor_registry = _tensor_builder->tensorRegistry(); - dyn_ctx->dynamic_tensor_manager = _tensor_builder->dynamicTensorManager(); + dyn_ctx->tensor_registry = _tensor_reg; + dyn_ctx->dynamic_tensor_manager = _dyn_tensor_manager; _return_fn_seq->dynamic_tensor_ctx(dyn_ctx); } @@ -93,12 +93,7 @@ void KernelGenerator::visit(const ir::operation::If &node) auto output_tensor = getTensor(output_index); output_tensors.emplace_back(output_tensor); - const auto output_tensor_builder = getTensorBuilder(output_index); - if (output_tensor_builder->supportDynamicTensor()) - { - auto output_dyn_manager = output_tensor_builder->dynamicTensorManager(); - outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager}; - } + outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index}; } // IfLayer just set ExecutorMap instead of then and else executor to avoid complexity of @@ -121,14 +116,7 @@ void KernelGenerator::visit(const ir::operation::Permute &node) std::vector<std::shared_ptr<ITensor>> output_tensors{getTensor(output_index)}; std::vector<std::shared_ptr<ITensor>> input_tensors{getTensor(input_index)}; std::unordered_map<std::shared_ptr<ITensor>, exec::DynAllocInfo> outputs_dyn_alloc_info; - const auto output_tensor_builder = getTensorBuilder(output_index); - VERBOSE(PERMUTE_FIND_TB) << output_index << " -> " << output_tensor_builder.get() << std::endl; - assert(output_tensor_builder != nullptr); - if (output_tensor_builder->supportDynamicTensor()) - { - outputs_dyn_alloc_info[output_tensors.at(0)] = - exec::DynAllocInfo{output_index, output_tensor_builder->dynamicTensorManager()}; - } + outputs_dyn_alloc_info[output_tensors.at(0)] = exec::DynAllocInfo{output_index}; auto fn = std::make_unique<kernel::PermuteLayer>(input_tensors, output_tensors, outputs_dyn_alloc_info); @@ -159,12 +147,7 @@ void KernelGenerator::visit(const ir::operation::While &node) output_tensors.emplace_back(output_tensor); - const auto output_tensor_builder = getTensorBuilder(output_index); - if (output_tensor_builder->supportDynamicTensor()) - { - auto output_dyn_manager = output_tensor_builder->dynamicTensorManager(); - outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index, output_dyn_manager}; - } + outputs_dyn_alloc_info[output_tensor] = exec::DynAllocInfo{output_index}; } // WhileLayer just set ExecutorMap instead of cond and body executor to avoid complexity of @@ -178,34 +161,7 @@ void KernelGenerator::visit(const ir::operation::While &node) std::shared_ptr<backend::ITensor> KernelGenerator::getTensor(const ir::OperandIndex &index) { - std::shared_ptr<backend::ITensor> ret; - for (auto tensor_builder : _tensor_builder_set) - { - auto tensor = tensor_builder->tensorAt(index); - if (tensor) - { - ret = tensor; - break; - } - } - assert(ret != nullptr); - return ret; -} - -std::shared_ptr<backend::ITensorBuilder> -KernelGenerator::getTensorBuilder(const ir::OperandIndex &index) -{ - std::shared_ptr<backend::ITensorBuilder> ret; - for (auto tensor_builder : _tensor_builder_set) - { - auto reg = tensor_builder->tensorRegistry(); - auto tensor = reg ? reg->getNativeITensor(index) : tensor_builder->tensorAt(index); - if (tensor) - { - ret = tensor_builder; - break; - } - } + std::shared_ptr<backend::ITensor> ret = _tensor_registries.getITensor(index); assert(ret != nullptr); return ret; } diff --git a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h index 1fc77935c..b84a810e4 100644 --- a/runtime/onert/core/src/backend/controlflow/KernelGenerator.h +++ b/runtime/onert/core/src/backend/controlflow/KernelGenerator.h @@ -22,9 +22,8 @@ #include <exec/IExecutor.h> #include <ir/Graph.h> #include "TensorBuilder.h" -#include "compiler/TensorBuilders.h" - -#include "compiler/TensorBuilders.h" +#include "compiler/TensorRegistries.h" +#include "TensorRegistry.h" namespace onert { @@ -36,11 +35,12 @@ namespace controlflow class KernelGenerator : public IKernelGenerator { public: - KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder); + KernelGenerator(const ir::Graph &graph, IDynamicTensorManager *dyn_tensor_manager, + const std::shared_ptr<TensorRegistry> &tensor_reg); - void setTensorBuilderSet(const compiler::TensorBuilders &tensor_builder_set) + void setTensorRegistries(const compiler::TensorRegistries &tensor_registries) { - _tensor_builder_set = tensor_builder_set; + _tensor_registries = tensor_registries; } void setExecutorMap(const std::shared_ptr<exec::ExecutorMap> &executor_map) { @@ -57,12 +57,12 @@ public: private: std::shared_ptr<backend::ITensor> getTensor(const ir::OperandIndex &index); - std::shared_ptr<backend::ITensorBuilder> getTensorBuilder(const ir::OperandIndex &index); private: const ir::Graph &_graph; - std::shared_ptr<TensorBuilder> _tensor_builder; - compiler::TensorBuilders _tensor_builder_set; + IDynamicTensorManager *_dyn_tensor_manager; + std::shared_ptr<TensorRegistry> _tensor_reg; + compiler::TensorRegistries _tensor_registries; exec::ExecutorMap *_executor_map; }; diff --git a/runtime/onert/core/src/ir/operation/Log.cc b/runtime/onert/core/src/backend/controlflow/Tensor.h index 85598bc87..ba5bafd75 100644 --- a/runtime/onert/core/src/ir/operation/Log.cc +++ b/runtime/onert/core/src/backend/controlflow/Tensor.h @@ -14,26 +14,22 @@ * limitations under the License. */ -#include "ir/operation/Log.h" +#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ +#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ -#include <cassert> - -#include "ir/OperationVisitor.h" +#include <backend/cpu_common/Tensor.h> namespace onert { -namespace ir +namespace backend { -namespace operation +namespace controlflow { -void Log::accept(OperationVisitor &v) const { v.visit(*this); } - -Log::Log(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} +using Tensor = cpu_common::Tensor; -} // namespace operation -} // namespace ir +} // namespace controlflow +} // namespace backend } // namespace onert + +#endif // __ONERT_BACKEND_CONTROLFLOW_TENSOR_H__ diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc index 5bddb9185..e5c3f5fd5 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.cc @@ -27,10 +27,10 @@ namespace backend namespace controlflow { -TensorBuilder::TensorBuilder() - : _tensor_reg{new cpu_common::TensorRegistry()}, _user_tensor_reg{new UserTensorRegistry()}, - _static_tensor_mgr{new cpu_common::StaticTensorManager(_tensor_reg)}, - _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg, _user_tensor_reg)} +TensorBuilder::TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg) + : _tensor_reg{tensor_reg}, _dynamic_tensor_mgr{new DynamicTensorManager(_tensor_reg)}, + _static_tensor_mgr{ + new cpu_common::StaticTensorManager(_tensor_reg->base_reg(), _dynamic_tensor_mgr.get())} { /* empty */ } @@ -54,10 +54,13 @@ void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::Op void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) { - assert(_tensor_info_map.find(ind) != _tensor_info_map.end()); + // TODO Enhance the way of checking user tensors + if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors + return; + const auto tensor_info = _tensor_info_map.at(ind); - if (!at(ind)->is_dynamic()) + if (!nativeOwnTensorAt(ind)->is_dynamic()) { const auto size = tensor_info.total_size(); _static_tensor_mgr->claimPlan(ind, size); @@ -66,7 +69,11 @@ void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind) void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) { - if (!at(ind)->is_dynamic()) + // TODO Enhance the way of checking user tensors + if (_tensor_info_map.find(ind) == _tensor_info_map.end()) // Do not proceed for user tensors + return; + + if (!nativeOwnTensorAt(ind)->is_dynamic()) { _static_tensor_mgr->releasePlan(ind); } @@ -74,6 +81,11 @@ void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind) bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const { + // User tensors are not registered in _tensor_info_map but objects for them are exist + // in the tensor registry. + // TODO Enhance the way of checking user tensors + if (_tensor_reg->getITensor(ind)) + return true; return _tensor_info_map.find(ind) != _tensor_info_map.end(); } @@ -89,25 +101,9 @@ void TensorBuilder::allocate() // This is because CPU kernels require `ITensor`s to be allocated before Kernel Generation. } -std::shared_ptr<ITensor> TensorBuilder::tensorAt(const ir::OperandIndex &ind) -{ - // NOTE Find from User Tensor Registry first - // FIXME There may be both user tensor and native tensor for a `ind` which is a waste - auto user_tensor = _user_tensor_reg->getITensor(ind); - auto tensor = _tensor_reg->getITensor(ind); - if (user_tensor) - { - return user_tensor; - } - else - return tensor; -} - -void TensorBuilder::iterate(const IterateFunction &fn) { _static_tensor_mgr->iterate(fn); } - -std::shared_ptr<cpu_common::Tensor> TensorBuilder::at(const ir::OperandIndex &ind) +std::shared_ptr<cpu_common::Tensor> TensorBuilder::nativeOwnTensorAt(const ir::OperandIndex &ind) { - return _tensor_reg->getNativeTensor(ind); + return _tensor_reg->getNativeOwnTensor(ind); } std::unique_ptr<ITensorManager> TensorBuilder::releaseStaticTensorManager(void) @@ -120,10 +116,10 @@ std::unique_ptr<ITensorManager> TensorBuilder::releaseDynamicTensorManager(void) return std::move(_dynamic_tensor_mgr); } -void TensorBuilder::setUserTensor(const ir::OperandIndex &ind, - const std::shared_ptr<UserTensor> &tensor) +void TensorBuilder::setNativeUserTensor(const ir::OperandIndex &ind, + const std::shared_ptr<UserTensor> &tensor) { - _user_tensor_reg->setNativeTensor(ind, tensor); + _tensor_reg->setNativeUserTensor(ind, tensor); } } // namespace controlflow diff --git a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h index 9f2bb3754..2f2a2c47e 100644 --- a/runtime/onert/core/src/backend/controlflow/TensorBuilder.h +++ b/runtime/onert/core/src/backend/controlflow/TensorBuilder.h @@ -39,9 +39,7 @@ namespace controlflow class TensorBuilder : public ITensorBuilder { public: - TensorBuilder(); - - bool supportDynamicTensor() override { return true; } + TensorBuilder(const std::shared_ptr<TensorRegistry> &tensor_reg); /** * @brief Register tensor information to allocate on CPU backend @@ -61,15 +59,6 @@ public: void allocate() override; void postFunctionPrepare() override { /* DO NOTHING */} - /** - * @brief Get tensor with a specific OperandIndex - * - * @return shared_ptr<ITensor> if a tensor with given OperandIndex exists. nullptr otherwise. - */ - std::shared_ptr<ITensor> tensorAt(const ir::OperandIndex &ind) override; - - void iterate(const IterateFunction &fn) override; - std::unique_ptr<ITensorManager> releaseStaticTensorManager(void) override; IDynamicTensorManager *dynamicTensorManager(void) override { return _dynamic_tensor_mgr.get(); } @@ -82,16 +71,13 @@ public: * If not, program will crash with assert or exception. * @return shared_ptr<operand::Tensor> */ - std::shared_ptr<cpu_common::Tensor> at(const ir::OperandIndex &ind); - void setUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor); - - std::shared_ptr<ITensorRegistry> tensorRegistry() override { return _tensor_reg; } + std::shared_ptr<cpu_common::Tensor> nativeOwnTensorAt(const ir::OperandIndex &ind); + void setNativeUserTensor(const ir::OperandIndex &ind, const std::shared_ptr<UserTensor> &tensor); private: - const std::shared_ptr<cpu_common::TensorRegistry> _tensor_reg; - const std::shared_ptr<UserTensorRegistry> _user_tensor_reg; - std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr; + const std::shared_ptr<TensorRegistry> _tensor_reg; std::unique_ptr<DynamicTensorManager> _dynamic_tensor_mgr; + std::unique_ptr<cpu_common::StaticTensorManager> _static_tensor_mgr; ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map; ir::OperandIndexMap<ir::Layout> _tensor_layout_map; }; diff --git a/runtime/onert/core/src/backend/controlflow/TensorRegistry.h b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h new file mode 100644 index 000000000..678c5b73b --- /dev/null +++ b/runtime/onert/core/src/backend/controlflow/TensorRegistry.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ +#define __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ + +#include "backend/cpu_common/TensorRegistry.h" +#include "backend/ITensorRegistry.h" +#include "Tensor.h" +#include "UserTensor.h" +#include <assert.h> + +namespace onert +{ +namespace backend +{ +namespace controlflow +{ + +/** + * @brief Tensor registry class for controlflow backend + * + * This class contains three types of tensors. Two native tensors(tensors that are managed by this + * backend) and the other is migrant tensor. + * + * - NativeUserTensor - @c UserTensor managed by this backend, buffer is user-given + * - NativeOwnTensor - @c cpu_common::Tensor managed by this backend ( in @c _base_reg ) + * - MigrantTensor - @c IPortableTensor managed by other backends ( in @c _base_reg ) + * + * @note @c _base_reg is used in implementation to reuse @c cpu_common::StaticTensorManager + * + */ +class TensorRegistry : public ITensorRegistry +{ +public: + TensorRegistry() : _base_reg{new cpu_common::TensorRegistry} {} + + std::shared_ptr<ITensor> getITensor(const ir::OperandIndex &ind) override + { + auto base_tensor = _base_reg->getITensor(ind); + if (base_tensor) + return base_tensor; + return getNativeUserTensor(ind); + } + + std::shared_ptr<ITensor> getNativeITensor(const ir::OperandIndex &ind) override + { + auto base_tensor = _base_reg->getNativeITensor(ind); + if (base_tensor) + return base_tensor; + return getNativeUserTensor(ind); + } + + std::shared_ptr<IPortableTensor> getPortableTensor(const ir::OperandIndex &ind) + { + auto base_tensor = _base_reg->getPortableTensor(ind); + if (base_tensor) + return base_tensor; + return getNativeUserTensor(ind); + } + + std::shared_ptr<IPortableTensor> getNativeTensor(const ir::OperandIndex &ind) + { + auto base_tensor = _base_reg->getNativeTensor(ind); + if (base_tensor) + return base_tensor; + return getNativeUserTensor(ind); + } + + std::shared_ptr<Tensor> getNativeOwnTensor(const ir::OperandIndex &ind) + { + return _base_reg->getNativeTensor(ind); + } + + std::shared_ptr<UserTensor> getNativeUserTensor(const ir::OperandIndex &ind) + { + auto tensor = _native_user_tensors.find(ind); + if (tensor != _native_user_tensors.end()) + return tensor->second; + return nullptr; + } + + bool setMigrantTensor(const ir::OperandIndex &ind, + const std::shared_ptr<IPortableTensor> &tensor) override + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _base_reg->setMigrantTensor(ind, tensor); + return true; + } + + void setNativeOwnTensor(ir::OperandIndex ind, const std::shared_ptr<Tensor> &tensor) + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _base_reg->setNativeTensor(ind, tensor); + } + + void setNativeUserTensor(ir::OperandIndex ind, const std::shared_ptr<UserTensor> &tensor) + { + assert(tensor); + assert(!getITensor(ind)); // For the ind, tensor is not registered yet + _native_user_tensors[ind] = tensor; + } + + const ir::OperandIndexMap<std::shared_ptr<UserTensor>> &native_user_tensors() + { + return _native_user_tensors; + } + std::shared_ptr<cpu_common::TensorRegistry> base_reg() { return _base_reg; } + +private: + std::shared_ptr<cpu_common::TensorRegistry> _base_reg; + ir::OperandIndexMap<std::shared_ptr<UserTensor>> _native_user_tensors; +}; + +} // namespace controlflow +} // namespace backend +} // namespace onert + +#endif // ifndef __ONERT_BACKEND_CONTROLFLOW_TENSOR_REGISTRY_H__ diff --git a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc index 3c095b38c..e8f1ea679 100644 --- a/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc +++ b/runtime/onert/core/src/backend/controlflow/kernel/PermuteLayer.cc @@ -55,7 +55,11 @@ void PermuteLayer::run() try { const auto dst_index = _dst_dyn_alloc_info_map.at(dst_tensor).ind; - _dst_dyn_alloc_info_map.at(dst_tensor).dyn_tensor_manager->applyShape(dst_index, new_shape); + auto dyn_tensor_manager = dst_tensor->dynamic_tensor_manager(); + if (!dyn_tensor_manager) + throw std::runtime_error{ + "Error: PermuteLayer: output's TensorManager does not support dynamic tensor"}; + dyn_tensor_manager->applyShape(dst_index, new_shape); assert(dst_tensor->buffer() != nullptr); } catch (const std::out_of_range &e) diff --git a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc index cb27d757f..f7ce3d011 100644 --- a/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc +++ b/runtime/onert/core/src/backend/cpu_common/DynamicTensorManager.cc @@ -95,17 +95,7 @@ void DynamicTensorManager::buildTensor(const ir::OperandIndex &ind, void DynamicTensorManager::planDealloc(ir::OperationIndex op_ind, ir::OperandIndex operand_ind) { - auto find = _dealloc_tensor_map.find(op_ind); - if (find != _dealloc_tensor_map.end()) - { - auto &input_set = find->second; - input_set.emplace(operand_ind); - } - else - { - _dealloc_tensor_map.emplace( - std::make_pair(op_ind, std::unordered_set<ir::OperandIndex>{operand_ind})); - } + _dealloc_tensor_map[op_ind].emplace(operand_ind); } void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind) diff --git a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc index 820cad38a..440f70c93 100644 --- a/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc +++ b/runtime/onert/core/src/backend/cpu_common/StaticTensorManager.cc @@ -26,8 +26,10 @@ namespace backend namespace cpu_common { -StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®) - : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg} +StaticTensorManager::StaticTensorManager(const std::shared_ptr<TensorRegistry> ®, + IDynamicTensorManager *dynamic_tensor_manager) + : _const_mgr{new DynamicMemoryManager()}, _nonconst_mgr{new MemoryManager()}, _tensors{reg}, + _dynamic_tensor_manager{dynamic_tensor_manager} { // DO NOTHING } @@ -78,7 +80,7 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind, bool as_const) { assert(!_tensors->getNativeTensor(ind)); - auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, nullptr); + auto tensor = std::make_shared<Tensor>(tensor_info, backend_layout, _dynamic_tensor_manager); _tensors->setNativeTensor(ind, tensor); _as_constants[ind] = as_const; } diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc index 33b428a4b..93dbbc3b5 100644 --- a/runtime/onert/core/src/compiler/Compiler.cc +++ b/runtime/onert/core/src/compiler/Compiler.cc @@ -134,6 +134,12 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) backend::controlflow::Config::ID; } + // FIXME This is a workaround for bcq operations, should remove it + { + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQFullyConnected] = "bcq"; + _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq"; + } + { VERBOSE(Compiler) << std::boolalpha; VERBOSE(Compiler) << "==== Compiler Options ====" << std::endl; @@ -181,14 +187,14 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void) auto dump_level = static_cast<dumper::dot::DotDumper::Level>(_options.graph_dump_level); // Lower: Assign backend - std::unordered_map<ir::SubgraphIndex, std::unique_ptr<ir::LoweredGraph>> lowered_subgs; + std::unordered_map<ir::SubgraphIndex, std::unique_ptr<compiler::LoweredGraph>> lowered_subgs; _subgraphs->iterate([&](const ir::SubgraphIndex &index, ir::Graph &subg) { _options.is_primary_subgraph = (index == ir::SubgraphIndex{0}); onert::dumper::dot::DotDumper dot_dumper(subg, dump_level); dot_dumper.dump(nnfw::misc::str("before_lower_subg-", index.value())); // Lower: Assign backend - lowered_subgs[index] = std::make_unique<ir::LoweredGraph>(subg, _options); + lowered_subgs[index] = std::make_unique<compiler::LoweredGraph>(subg, _options); // Check backend(s) for subgraph support FP16 bool backends_support_fp16 = true; diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc index 82afd9e56..062c6c9c3 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.cc +++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc @@ -25,6 +25,7 @@ #include "compiler/ExecutionBuilder.h" #include "exec/ExecTime.h" #include "compiler/Linear.h" +#include "compiler/TensorBuilders.h" #include "backend/IConstantInitializer.h" #include "backend/IKernelGenerator.h" #include "backend/IOptimizer.h" @@ -64,6 +65,23 @@ private: std::shared_ptr<backend::IConfig> _config; }; +// TODO Think of a better way to manage TensorManagers +backend::TensorManagerSet createTensorManagerSet(const compiler::TensorBuilders &tensor_builders) +{ + backend::TensorManagerSet tensor_mgrs; + for (auto &tensor_builder : tensor_builders) + { + auto s_tensor_manager = tensor_builder->releaseStaticTensorManager(); + if (s_tensor_manager != nullptr) + tensor_mgrs.insert(std::move(s_tensor_manager)); + + auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager(); + if (d_tensor_manager != nullptr) + tensor_mgrs.insert(std::move(d_tensor_manager)); + } + return tensor_mgrs; +} + } // namespace } // namespace onert @@ -87,14 +105,14 @@ ExecutorFactory::ExecutorFactory() std::placeholders::_3, true); } -exec::IExecutor *ExecutorFactory::create(std::unique_ptr<ir::LoweredGraph> lowered_graph, +exec::IExecutor *ExecutorFactory::create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map) { return _map.at(options.executor)(std::move(lowered_graph), options, executor_map); } -void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph) +void ExecutorFactory::initializeBackendContext(compiler::LoweredGraph *lowered_graph) { struct Entry { @@ -132,7 +150,7 @@ void ExecutorFactory::initializeBackendContext(ir::LoweredGraph *lowered_graph) } } -void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph, +void ExecutorFactory::runTensorRegistration(compiler::LoweredGraph *lowered_graph, const std::vector<ir::OpSequenceIndex> &order) { for (const auto index : order) @@ -141,6 +159,8 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph, const auto backend = lowered_graph->getLowerInfo(index)->backend(); const auto tensor_register = lowered_graph->backend_contexts().at(backend)->tensor_register; auto tensor_builder = lowered_graph->backend_contexts().at(backend)->tensor_builder; + auto model_io = lowered_graph->graph().getInputs() + lowered_graph->graph().getOutputs(); + if (tensor_register) { // Custom registration @@ -154,7 +174,7 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph, const auto &op = lowered_graph->graph().operations().at(op_idx); for (const auto &index : (op.getInputs() | ir::Remove::UNDEFINED) + op.getOutputs()) { - if (!tensor_builder->isRegistered(index)) + if (!tensor_builder->isRegistered(index) && !model_io.contains(index)) { const auto &operand_lower_info = lowered_graph->getLowerInfo(index)->def_factors().getOnlyElement(); @@ -181,15 +201,28 @@ void ExecutorFactory::runTensorRegistration(ir::LoweredGraph *lowered_graph, } std::vector<std::shared_ptr<backend::ITensor>> -ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph, +ExecutorFactory::initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, const ir::OperandIndexSequence &indices) { std::vector<std::shared_ptr<backend::ITensor>> ret; - TensorBuilders tensor_builders{lowered_graph.backend_contexts(), false}; - std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder = - tensor_builders.getControlflowTensorBuilder(); + // TODO Store controlflow backend in BackendContext + std::shared_ptr<backend::controlflow::TensorBuilder> cf_tensor_builder; + std::shared_ptr<backend::controlflow::TensorRegistry> cf_tensor_reg; + for (const auto &e : lowered_graph.backend_contexts()) + { + auto backend = e.first; + auto &context = e.second; + if (backend->config()->id() == backend::controlflow::Config::ID) + { + cf_tensor_builder = + std::dynamic_pointer_cast<backend::controlflow::TensorBuilder>(context->tensor_builder); + cf_tensor_reg = + std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(context->tensor_registry); + } + } assert(cf_tensor_builder); + assert(cf_tensor_reg); for (auto ind : indices) { @@ -200,15 +233,16 @@ ExecutorFactory::initializeModelIOTensors(ir::LoweredGraph &lowered_graph, cf_tensor_builder->dynamicTensorManager()); // Add tensor to controlflow TensorRegistry. - cf_tensor_builder->setUserTensor(ind, tensor); + cf_tensor_reg->setNativeUserTensor(ind, tensor); ret.push_back(tensor); } return ret; } -void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph, - TensorBuilders &tensor_builders) +void ExecutorFactory::prepareExternalTensors(compiler::LoweredGraph &lowered_graph) { + TensorRegistries tensor_regs{lowered_graph.backend_contexts(), true}; + lowered_graph.op_seqs().iterate( [&](const ir::OpSequenceIndex &op_seq_index, const ir::OpSequence &op_seq) { auto lower_info = lowered_graph.getLowerInfo(op_seq_index); @@ -219,20 +253,20 @@ void ExecutorFactory::prepareExternalTensors(ir::LoweredGraph &lowered_graph, // If an OpSequence input/output tensor does not have a own tensor object, // it must be using external tensors, so find the tensor from other tensor builders and // set the tensor to this tensor builder if portable - if (!backend_ctx->tensor_builder->tensorAt(ind)) + if (!backend_ctx->tensor_registry->getITensor(ind)) { - auto tensor = tensor_builders.getITensor(ind); - assert(tensor); // The tensor must have been created in one of TensorBuilders + auto tensor = tensor_regs.getITensor(ind); + assert(tensor); // The tensor must have been registered auto ptensor = std::dynamic_pointer_cast<backend::IPortableTensor>(tensor); if (ptensor) - backend_ctx->tensor_builder->setMigrantTensor(ind, ptensor); + backend_ctx->tensor_registry->setMigrantTensor(ind, ptensor); } } }); } exec::IExecutor * -ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, +ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map) { @@ -277,13 +311,14 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_ Linear::planTensors(*lowered_graph, order); TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; + TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; for (auto &tensor_builder : tensor_builders) { tensor_builder->prepare(); } - prepareExternalTensors(*lowered_graph, tensor_builders); + prepareExternalTensors(*lowered_graph); ExecutionBuilder builder; @@ -296,7 +331,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_ auto cf_kernel_gen = dynamic_cast<backend::controlflow::KernelGenerator *>(kernel_gen.get()); if (cf_kernel_gen != nullptr) { - cf_kernel_gen->setTensorBuilderSet(tensor_builders); + cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } auto fn_seq = kernel_gen->generate(op_seq); @@ -335,9 +370,10 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_ }); } - auto exec = - new exec::LinearExecutor{std::move(lowered_graph), input_tensors, output_tensors, - tensor_builders, std::move(code_map), order}; + backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders); + auto exec = new exec::LinearExecutor{ + std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, + std::move(tensor_mgrs), std::move(code_map), order}; if (!options.trace_filepath.empty()) { @@ -350,7 +386,7 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_ } exec::IExecutor *ExecutorFactory::createDataflowExecutor( - std::unique_ptr<ir::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel) { const auto &backend_contexts = lowered_graph->backend_contexts(); @@ -369,6 +405,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( } TensorBuilders tensor_builders{lowered_graph->backend_contexts(), true}; + TensorRegistries tensor_regs{lowered_graph->backend_contexts(), true}; // To make tensors never be deallocated, this is a workaround to use static memory planner for (auto &tensor_builder : tensor_builders) @@ -387,7 +424,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( tensor_builder->prepare(); } - prepareExternalTensors(*lowered_graph, tensor_builders); + prepareExternalTensors(*lowered_graph); ExecutionBuilder builder; @@ -401,7 +438,7 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( if (cf_kernel_gen != nullptr) { assert(cf_kernel_gen != nullptr); - cf_kernel_gen->setTensorBuilderSet(tensor_builders); + cf_kernel_gen->setTensorRegistries(tensor_regs); cf_kernel_gen->setExecutorMap(executor_map); } auto fn_seq = kernel_gen->generate(op_seq); @@ -440,17 +477,20 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor( }); } + backend::TensorManagerSet tensor_mgrs = createTensorManagerSet(tensor_builders); + exec::ExecutorBase *exec = nullptr; if (parallel) { - exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, output_tensors, - tensor_builders, std::move(code_map)}; + exec = new exec::ParallelExecutor{std::move(lowered_graph), input_tensors, + output_tensors, tensor_regs, + std::move(tensor_mgrs), std::move(code_map)}; } else { - auto dataflow_exec = - new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, - tensor_builders, std::move(code_map)}; + auto dataflow_exec = new exec::DataflowExecutor{std::move(lowered_graph), input_tensors, + output_tensors, tensor_regs, + std::move(tensor_mgrs), std::move(code_map)}; if (options.he_profiling_mode) { std::vector<const backend::Backend *> backends; diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h index 418e5a764..b8893c03b 100644 --- a/runtime/onert/core/src/compiler/ExecutorFactory.h +++ b/runtime/onert/core/src/compiler/ExecutorFactory.h @@ -21,8 +21,8 @@ #include "backend/ITensor.h" #include "exec/IExecutor.h" -#include "ir/LoweredGraph.h" -#include "TensorBuilders.h" +#include "compiler/LoweredGraph.h" +#include "TensorRegistries.h" namespace onert { @@ -35,7 +35,7 @@ public: static ExecutorFactory &get(); public: - exec::IExecutor *create(std::unique_ptr<ir::LoweredGraph> lowered_graph, + exec::IExecutor *create(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map); @@ -43,28 +43,27 @@ private: ExecutorFactory(); private: - static void initializeBackendContext(ir::LoweredGraph *lowered_graph); - static void runTensorRegistration(ir::LoweredGraph *lowered_graph, + static void initializeBackendContext(compiler::LoweredGraph *lowered_graph); + static void runTensorRegistration(compiler::LoweredGraph *lowered_graph, const std::vector<ir::OpSequenceIndex> &order); static std::vector<std::shared_ptr<backend::ITensor>> - initializeModelIOTensors(ir::LoweredGraph &lowered_graph, + initializeModelIOTensors(compiler::LoweredGraph &lowered_graph, const ir::OperandIndexSequence &indices); - static void prepareExternalTensors(ir::LoweredGraph &lowered_graph, - TensorBuilders &tensor_builders); + static void prepareExternalTensors(compiler::LoweredGraph &lowered_graph); static exec::IExecutor * - createLinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, + createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map); static exec::IExecutor * - createDataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, + createDataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const compiler::CompilerOptions &options, const std::shared_ptr<exec::ExecutorMap> &executor_map, bool parallel); private: - std::unordered_map< - std::string, std::function<exec::IExecutor *( - std::unique_ptr<ir::LoweredGraph>, const compiler::CompilerOptions &options, - const std::shared_ptr<exec::ExecutorMap> &executor_map)>> + std::unordered_map<std::string, std::function<exec::IExecutor *( + std::unique_ptr<compiler::LoweredGraph>, + const compiler::CompilerOptions &options, + const std::shared_ptr<exec::ExecutorMap> &executor_map)>> _map; }; diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc index 5c4b84ec0..23a6a253d 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.cc @@ -44,7 +44,7 @@ namespace onert namespace compiler { -Fp32ToFp16Converter::Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph) +Fp32ToFp16Converter::Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph) : _lowered_graph{lowered_graph} { VERBOSE(Fp32ToFp16Converter) << "Fp16 Enable on" << std::endl; diff --git a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h index 5dbf74472..eeecb9846 100644 --- a/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h +++ b/runtime/onert/core/src/compiler/Fp32ToFp16Converter.h @@ -17,7 +17,7 @@ #ifndef __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__ #define __ONERT_COMPILER_FP32_TO_FP16_CONVERTER_H__ -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" namespace onert { @@ -28,7 +28,7 @@ namespace compiler class Fp32ToFp16Converter { public: - Fp32ToFp16Converter(ir::LoweredGraph &lowered_graph); + Fp32ToFp16Converter(compiler::LoweredGraph &lowered_graph); public: void run(); @@ -89,7 +89,7 @@ private: void convertOperandsOfOpSequence(ir::OpSequence &op_seq); private: - ir::LoweredGraph &_lowered_graph; + compiler::LoweredGraph &_lowered_graph; OpSeqIndexList _list_fp32_to_fp16; OpSeqIndexList _list_fp16_to_fp32; }; diff --git a/runtime/onert/core/src/compiler/HEScheduler.cc b/runtime/onert/core/src/compiler/HEScheduler.cc index de9b4fbd0..5653b090e 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.cc +++ b/runtime/onert/core/src/compiler/HEScheduler.cc @@ -54,42 +54,10 @@ static bool isQuant(const ir::Graph &graph, const ir::Operation &node) return false; } -static bool isWorkaroundSkip(const ir::Graph &graph, const backend::Backend *backend, - const ir::Operation &node, bool quant) +static bool isWorkaroundSkip(const ir::Graph &, const backend::Backend *, const ir::Operation &, + bool) { - /* TODO: this is workaround, come up with better solution if have. - Adding exception in stage doesn't help. Because if there is a record for add without - broadcast, scheduling will select it since it doesn't distinguish broadcast and - non-broadcast like it does for quant non-quantized*/ - if (backend->config()->id() == "cpu" && - (node.opcode() == ir::OpCode::Add || node.opcode() == ir::OpCode::Sub || - node.opcode() == ir::OpCode::Mul)) - { - const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; - /*Broadcasting isn't supported on CPU: no way to differ the existing exec_time record with and - * without broadcasting*/ - if (!(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape())) - { - return true; - } - } - /* TODO: this is workaround, come up with better solution if have. - Adding exception in stage doesn't help. Because if there is a record for Mul without - broadcast, scheduling will select it since it doesn't distinguish broadcast and - non-broadcast like it does for quant non-quantized*/ - else if (backend->config()->id() == "acl_neon" && node.opcode() == ir::OpCode::Mul) - { - const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; - - // Nontrivial broadcasting isn't supported yet - if (quant || - !(graph.operands().at(lhs_index).shape() == graph.operands().at(rhs_index).shape())) - { - return true; - } - } + // Now, there is no workaround return false; } diff --git a/runtime/onert/core/src/compiler/HEScheduler.h b/runtime/onert/core/src/compiler/HEScheduler.h index d8ceca9c8..b9cee5881 100644 --- a/runtime/onert/core/src/compiler/HEScheduler.h +++ b/runtime/onert/core/src/compiler/HEScheduler.h @@ -59,6 +59,8 @@ public: { for (auto &entry : backend_contexts) { + if (entry.first->config()->id() == backend::controlflow::Config::ID) + continue; _all_backends.push_back(entry.first); } _backend_resolver = std::make_unique<compiler::BackendResolver>(); diff --git a/runtime/onert/core/src/compiler/Linear.cc b/runtime/onert/core/src/compiler/Linear.cc index 493ca1e43..49a989500 100644 --- a/runtime/onert/core/src/compiler/Linear.cc +++ b/runtime/onert/core/src/compiler/Linear.cc @@ -29,7 +29,7 @@ namespace onert namespace compiler { -std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lowered_graph) +std::vector<ir::OpSequenceIndex> Linear::linearize(const compiler::LoweredGraph &lowered_graph) { std::vector<ir::OpSequenceIndex> order; lowered_graph.iterateTopolOpSeqs( @@ -39,7 +39,7 @@ std::vector<ir::OpSequenceIndex> Linear::linearize(const ir::LoweredGraph &lower return order; } -void Linear::dump(const ir::LoweredGraph &lowered_graph, +void Linear::dump(const compiler::LoweredGraph &lowered_graph, const std::vector<ir::OpSequenceIndex> &order) { { @@ -62,7 +62,7 @@ void Linear::dump(const ir::LoweredGraph &lowered_graph, } } -void Linear::planTensors(const ir::LoweredGraph &lowered_graph, +void Linear::planTensors(const compiler::LoweredGraph &lowered_graph, const std::vector<ir::OpSequenceIndex> &order) { const auto &graph = lowered_graph.graph(); @@ -180,11 +180,9 @@ void Linear::planTensors(const ir::LoweredGraph &lowered_graph, tensor_builder_map[ind]->notifyLastUse(ind); // plan for deallocation of dynamic tensor - if (tensor_builder_map[ind]->supportDynamicTensor()) - { - assert(tensor_builder_map[ind]->dynamicTensorManager()); - tensor_builder_map[ind]->dynamicTensorManager()->planDealloc(op_idx, ind); - } + auto dyn_tensor_manager = tensor_builder_map[ind]->dynamicTensorManager(); + if (dyn_tensor_manager) + dyn_tensor_manager->planDealloc(op_idx, ind); } } } diff --git a/runtime/onert/core/src/compiler/Linear.h b/runtime/onert/core/src/compiler/Linear.h index faeff77f3..1e24cf92b 100644 --- a/runtime/onert/core/src/compiler/Linear.h +++ b/runtime/onert/core/src/compiler/Linear.h @@ -23,7 +23,7 @@ #include "ir/OpSequences.h" #include "ir/Index.h" #include "backend/ITensorBuilder.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" namespace onert { @@ -41,10 +41,10 @@ namespace compiler class Linear { public: - static std::vector<ir::OpSequenceIndex> linearize(const ir::LoweredGraph &lowered_graph); - static void dump(const ir::LoweredGraph &lowered_graph, + static std::vector<ir::OpSequenceIndex> linearize(const compiler::LoweredGraph &lowered_graph); + static void dump(const compiler::LoweredGraph &lowered_graph, const std::vector<ir::OpSequenceIndex> &order); - static void planTensors(const ir::LoweredGraph &lowered_graph, + static void planTensors(const compiler::LoweredGraph &lowered_graph, const std::vector<ir::OpSequenceIndex> &order); }; diff --git a/runtime/onert/core/src/ir/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc index 8aedfbdf0..1489a1884 100644 --- a/runtime/onert/core/src/ir/LoweredGraph.cc +++ b/runtime/onert/core/src/compiler/LoweredGraph.cc @@ -14,18 +14,18 @@ * limitations under the License. */ -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" #include <assert.h> #include <sstream> #include "util/logging.h" -#include "pass/ConstantInsertionPass.h" -#include "pass/ConstantLoweringPass.h" -#include "pass/PermutationOperationPass.h" -#include "pass/PermutationInsertionPass.h" -#include "pass/PermutationEliminationPass.h" +#include "compiler/pass/ConstantInsertionPass.h" +#include "compiler/pass/ConstantLoweringPass.h" +#include "compiler/pass/PermutationOperationPass.h" +#include "compiler/pass/PermutationInsertionPass.h" +#include "compiler/pass/PermutationEliminationPass.h" #include "ir/GraphIterator.h" -#include "verifier/Verifier.h" +#include "ir/verifier/Verifier.h" #include "backend/Backend.h" #include "backend/IConfig.h" #include "compiler/BackendResolver.h" @@ -34,16 +34,15 @@ namespace onert { -namespace ir +namespace compiler { -LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions &options) - : _graph{graph} +LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph} { bool linear_executor = (options.executor == "Linear"); // Build backend contexts - auto &backend_manager = compiler::BackendManager::get(); + auto &backend_manager = BackendManager::get(); // Always create Controlflow backend context auto cf_backend = backend_manager.getControlflow(); @@ -73,36 +72,37 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions & // TODO Move "schedule" phase out of here // Schedule - std::unique_ptr<compiler::BackendResolver> backend_resolver; + std::unique_ptr<BackendResolver> backend_resolver; if (options.he_scheduler) { - auto scheduler = compiler::HEScheduler(_backend_contexts, options); + auto scheduler = HEScheduler(_backend_contexts, options); backend_resolver = scheduler.schedule(_graph); _indexed_ranks = scheduler.getIndexedRanks(); } else { - auto scheduler = compiler::ManualScheduler(_backend_contexts, options); + auto scheduler = ManualScheduler(_backend_contexts, options); backend_resolver = scheduler.schedule(_graph); } { // operand::LowerInfo holder - OperandIndexMap<std::unique_ptr<operand::LowerInfo>> operands_lower_info; + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> operands_lower_info; - _graph.operands().iterate([&](const OperandIndex &index, const Operand &) { - operands_lower_info[index] = std::make_unique<operand::LowerInfo>(); + _graph.operands().iterate([&](const ir::OperandIndex &index, const ir::Operand &) { + operands_lower_info[index] = std::make_unique<ir::operand::LowerInfo>(); }); // Make op_seqs while checking whether a node can be merged into a op_seq. makeOpSequences(operands_lower_info, options, *backend_resolver); - _op_seqs.iterate([&](const OpSequenceIndex &, OpSequence &op_seq) { + _op_seqs.iterate([&](const ir::OpSequenceIndex &, ir::OpSequence &op_seq) { assert(op_seq.operations().size() > 0); std::reverse(std::begin(op_seq.operations()), std::end(op_seq.operations())); }); - _op_seqs.dump("merged and sorted operations without permutation", _graph.operations()); + VERBOSE(OpSequences) << "dump without permutation" << std::endl; + dumpOpSequences(_op_seqs, _graph.operations()); pass::ConstantInsertionPass ci_pass(*this); ci_pass.run(); @@ -127,17 +127,19 @@ LoweredGraph::LoweredGraph(const Graph &graph, const compiler::CompilerOptions & pass::PermutationEliminationPass pe_pass(*this); pe_pass.run(); - _op_seqs.dump("merged and sorted operations with permutation", _graph.operations()); + VERBOSE(OpSequences) << "dump with permutation" << std::endl; + dumpOpSequences(_op_seqs, _graph.operations()); } // Graph verifications { - assert(verifier::DAGChecker().verify(_graph)); - assert(verifier::EdgeConsistencyChecker().verify(_graph)); + assert(ir::verifier::DAGChecker().verify(_graph)); + assert(ir::verifier::EdgeConsistencyChecker().verify(_graph)); } } -const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op_seq_index) const +const ir::operation::LowerInfo * +LoweredGraph::getLowerInfo(const ir::OpSequenceIndex &op_seq_index) const { auto itr = _lower_info_map.op_seq.find(op_seq_index); if (itr == _lower_info_map.op_seq.end()) @@ -145,13 +147,13 @@ const operation::LowerInfo *LoweredGraph::getLowerInfo(const OpSequenceIndex &op return itr->second.get(); } -void LoweredGraph::setLowerInfo(const OpSequenceIndex &op_seq_index, - std::unique_ptr<operation::LowerInfo> &&lower_info) +void LoweredGraph::setLowerInfo(const ir::OpSequenceIndex &op_seq_index, + std::unique_ptr<ir::operation::LowerInfo> &&lower_info) { _lower_info_map.op_seq.insert(std::make_pair(op_seq_index, std::move(lower_info))); } -void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index) +void LoweredGraph::removeLowerInfo(const ir::OpSequenceIndex &op_seq_index) { auto &op_seq_lower_info = _lower_info_map.op_seq; assert(op_seq_lower_info.find(op_seq_index) != op_seq_lower_info.end()); @@ -165,7 +167,7 @@ void LoweredGraph::removeLowerInfo(const OpSequenceIndex &op_seq_index) } } -const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) const +const ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) const { auto itr = _lower_info_map.operand.find(index); if (itr == _lower_info_map.operand.end()) @@ -173,7 +175,7 @@ const operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) return itr->second.get(); } -operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) +ir::operand::LowerInfo *LoweredGraph::getLowerInfo(const ir::OperandIndex &index) { auto itr = _lower_info_map.operand.find(index); if (itr == _lower_info_map.operand.end()) @@ -181,25 +183,26 @@ operand::LowerInfo *LoweredGraph::getLowerInfo(const OperandIndex &index) return itr->second.get(); } -void LoweredGraph::setLowerInfo(const OperandIndex &index, - std::unique_ptr<operand::LowerInfo> &&lower_info) +void LoweredGraph::setLowerInfo(const ir::OperandIndex &index, + std::unique_ptr<ir::operand::LowerInfo> &&lower_info) { _lower_info_map.operand.insert(std::make_pair(index, std::move(lower_info))); } -void LoweredGraph::removeLowerInfo(const OperandIndex &index) +void LoweredGraph::removeLowerInfo(const ir::OperandIndex &index) { _lower_info_map.operand.erase(index); } void LoweredGraph::iterateTopolOpSeqs( - const std::function<void(const OpSequenceIndex &, const OpSequence &)> &fn) const + const std::function<void(const ir::OpSequenceIndex &, const ir::OpSequence &)> &fn) const { - // Topological Sorting for OpSequences - std::vector<OpSequenceIndex> topol_sorted; - PostDfsIterator<true>{}.iterateOpSeqs( - *this, - [&](const OpSequenceIndex &index, const OpSequence &) { topol_sorted.emplace_back(index); }); + // Topological Sorting for ir::OpSequences + std::vector<ir::OpSequenceIndex> topol_sorted; + ir::PostDfsIterator<true>{}.iterateOpSeqs( + *this, [&](const ir::OpSequenceIndex &index, const ir::OpSequence &) { + topol_sorted.emplace_back(index); + }); std::reverse(topol_sorted.begin(), topol_sorted.end()); for (const auto op_seq_idx : topol_sorted) { @@ -209,12 +212,14 @@ void LoweredGraph::iterateTopolOpSeqs( } void LoweredGraph::iterateTopolOpSeqs( - const std::function<void(const OpSequenceIndex &, OpSequence &)> &fn) + const std::function<void(const ir::OpSequenceIndex &, ir::OpSequence &)> &fn) { - // Topological Sorting for OpSequences - std::vector<OpSequenceIndex> topol_sorted; - PostDfsIterator<false>{}.iterateOpSeqs( - *this, [&](const OpSequenceIndex &index, OpSequence &) { topol_sorted.emplace_back(index); }); + // Topological Sorting for ir::OpSequences + std::vector<ir::OpSequenceIndex> topol_sorted; + ir::PostDfsIterator<false>{}.iterateOpSeqs( + *this, [&](const ir::OpSequenceIndex &index, ir::OpSequence &) { + topol_sorted.emplace_back(index); + }); std::reverse(topol_sorted.begin(), topol_sorted.end()); for (const auto op_seq_idx : topol_sorted) { @@ -223,12 +228,12 @@ void LoweredGraph::iterateTopolOpSeqs( } } -OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex &node_index, - const Operation &node) +ir::OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const ir::OperationIndex &node_index, + const ir::Operation &node) { // Create a fresh op_seq with one operation, and append it to op_seqs // Create a fresh op_seq - auto op_seq = std::make_unique<OpSequence>(_graph.layout()); + auto op_seq = std::make_unique<ir::OpSequence>(_graph.layout()); // Add an operation op_seq->appendOperation(node_index); @@ -241,21 +246,21 @@ OpSequenceIndex LoweredGraph::appendFreshSingleOpSequence(const OperationIndex & } void LoweredGraph::makeOpSequences( - OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, - const compiler::CompilerOptions &options, const compiler::BackendResolver &backend_resolver) + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, + const CompilerOptions &options, const BackendResolver &backend_resolver) { // if SUBG_MAX_NODE == 0, no limit on nodes of a op_seq const int op_seq_max_node = options.op_seq_max_node; assert(op_seq_max_node >= 0); bool is_profiling = options.he_profiling_mode; - OpSequence *op_seq = nullptr; - OpSequenceIndex op_seq_index; + ir::OpSequence *op_seq = nullptr; + ir::OpSequenceIndex op_seq_index; // NOTE: The below method appends nodes while making one op_seq if needed. If something better // ways, happy to update this code. - PostDfsConstIterator{}.iterate( - _graph, [&](const OperationIndex &node_index, const Operation &node) { + ir::PostDfsConstIterator{}.iterate( + _graph, [&](const ir::OperationIndex &node_index, const ir::Operation &node) { // LowerInfo for in/output operands auto backend = backend_resolver.getBackend(node_index); @@ -269,12 +274,12 @@ void LoweredGraph::makeOpSequences( for (auto operand : node.getInputs() | ir::Remove::UNDEFINED) { auto &&lower_info = operands_lower_info.at(operand); - lower_info->addUsePermuteFactor(operand::PermuteFactor{backend, backend_layout}); + lower_info->addUsePermuteFactor(ir::operand::PermuteFactor{backend, backend_layout}); } for (auto operand : node.getOutputs()) { auto &&lower_info = operands_lower_info.at(operand); - lower_info->addDefPermuteFactor(operand::PermuteFactor{backend, backend_layout}); + lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{backend, backend_layout}); } bool new_op_seq = (op_seq == nullptr || @@ -288,9 +293,9 @@ void LoweredGraph::makeOpSequences( { auto new_op_seq_index = appendFreshSingleOpSequence(node_index, node); - // OpSequence LowerInfo + // ir::OpSequence LowerInfo setLowerInfo(new_op_seq_index, - std::make_unique<operation::LowerInfo>(backend, backend_layout)); + std::make_unique<ir::operation::LowerInfo>(backend, backend_layout)); op_seq_index = new_op_seq_index; op_seq = &(_op_seqs.at(new_op_seq_index)); @@ -318,16 +323,17 @@ void LoweredGraph::makeOpSequences( } void LoweredGraph::manipulateLowerInfo( - OperandIndexMap<std::unique_ptr<operand::LowerInfo>> &operands_lower_info, bool is_primary) + ir::OperandIndexMap<std::unique_ptr<ir::operand::LowerInfo>> &operands_lower_info, + bool is_primary) { - const auto controlflow_backend = compiler::BackendManager::get().getControlflow(); + const auto controlflow_backend = BackendManager::get().getControlflow(); // TODO Rather than handling primary graph specially, // let the permute inserted and remove it later if (is_primary) { // TODO Rather than using NHWC Get frontend layout of this node from IR - auto factor = operand::PermuteFactor{controlflow_backend, Layout::NHWC}; + auto factor = ir::operand::PermuteFactor{controlflow_backend, ir::Layout::NHWC}; for (auto index : _graph.getInputs() | ir::Remove::UNDEFINED) { auto &&lower_info = operands_lower_info.at(index); @@ -355,9 +361,9 @@ void LoweredGraph::manipulateLowerInfo( else { // In case of that an operand is Graph's input and not input or output of any operation - lower_info->addDefPermuteFactor(operand::PermuteFactor{ + lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ controlflow_backend, - Layout::NHWC // TODO Get frontend layout of this node from IR + ir::Layout::NHWC // TODO Get frontend layout of this node from IR }); } } @@ -368,15 +374,15 @@ void LoweredGraph::manipulateLowerInfo( if (lower_info->def_factors().size() == 0) { // In case of that an operand is Graph's output and not input or output of any operation - lower_info->addDefPermuteFactor(operand::PermuteFactor{ + lower_info->addDefPermuteFactor(ir::operand::PermuteFactor{ controlflow_backend, - Layout::NHWC // TODO Get frontend layout of this node from IR + ir::Layout::NHWC // TODO Get frontend layout of this node from IR }); } } // Set LowerInfo for each operand from the operand::LowerInfo holder - _graph.operands().iterate([&](const OperandIndex &index, Operand &) { + _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &) { setLowerInfo(index, std::move(operands_lower_info[index])); }); } @@ -388,11 +394,11 @@ void LoweredGraph::dumpLowerInfo() std::map<uint32_t, std::string> dumps; - _graph.operands().iterate([&](const OperandIndex &index, Operand &object) { + _graph.operands().iterate([&](const ir::OperandIndex &index, ir::Operand &object) { std::stringstream sstream; if (!getLowerInfo(index)->def_factors().empty() || !getLowerInfo(index)->use_factors().empty()) { - auto factors_to_string = [](const operand::PermuteFactorSet &factors) { + auto factors_to_string = [](const ir::operand::PermuteFactorSet &factors) { std::string str; for (auto factor : factors) { @@ -403,7 +409,7 @@ void LoweredGraph::dumpLowerInfo() return "{ " + str + "}"; }; - auto operation_index_to_string = [](const OperationIndexSet &operations) { + auto operation_index_to_string = [](const ir::OperationIndexSet &operations) { std::string str; for (auto op : operations) { @@ -427,8 +433,8 @@ void LoweredGraph::dumpLowerInfo() sstream << (shape.dim(i)) << " "; } sstream << "}" << std::endl; - sstream << " - Def Operations : " << def_ops << std::endl; - sstream << " - Use Operations : " << use_ops << std::endl; + sstream << " - Def ir::Operations : " << def_ops << std::endl; + sstream << " - Use ir::Operations : " << use_ops << std::endl; sstream << " - Lower Info" << std::endl; sstream << " - Def Backends : " << def_layouts << std::endl; sstream << " - Use Backends : " << use_layouts << std::endl; @@ -445,8 +451,9 @@ void LoweredGraph::dumpLowerInfo() } } -bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const OperationIndex &node_index, - Layout layout, const compiler::BackendResolver &backend_resolver) +bool LoweredGraph::mergeable(const ir::OpSequenceIndex &op_seq_index, + const ir::OperationIndex &node_index, ir::Layout layout, + const BackendResolver &backend_resolver) { // Are they mergeable? // 1. the same backend id and layout? @@ -470,10 +477,10 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio // Branched? { - std::unordered_set<OperationIndex> branched_set; + std::unordered_set<ir::OperationIndex> branched_set; // Check for branching up - for (const auto &input : op_seq.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : op_seq.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { const auto &input_obj = _graph.operands().at(input); auto def = input_obj.getDef(); @@ -489,7 +496,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio branched_set.clear(); // Check for branching down - for (const auto &output : node.getOutputs() | Remove::DUPLICATED) + for (const auto &output : node.getOutputs() | ir::Remove::DUPLICATED) { // TODO Fix this workaround for the case of model outputs that are used by another operation // This is needed since the branching is decided by operation, but for model outputs, @@ -516,7 +523,7 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio const auto &node_outputs = node.getOutputs(); // op_seq's operations are in order so that we just check the first and the last - std::vector<OperationIndex> op_seq_ops{op_seq.operations()[0]}; + std::vector<ir::OperationIndex> op_seq_ops{op_seq.operations()[0]}; if (op_seq.operations().size() > 1) op_seq_ops.emplace_back(op_seq.operations()[op_seq.operations().size() - 1]); @@ -556,5 +563,5 @@ bool LoweredGraph::mergeable(const OpSequenceIndex &op_seq_index, const Operatio return false; } -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/ManualScheduler.cc b/runtime/onert/core/src/compiler/ManualScheduler.cc index 1d591ae3c..ed49ee56f 100644 --- a/runtime/onert/core/src/compiler/ManualScheduler.cc +++ b/runtime/onert/core/src/compiler/ManualScheduler.cc @@ -40,7 +40,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap const auto &manual_options = _options.manual_scheduler_options; auto backend_resolver = std::make_unique<compiler::BackendResolver>(); - // This fallback will be used for unavailable backends + // This fallback will be used in case that `backend_for_all` is unavailable auto fallback = [&]() -> const backend::Backend * { for (auto backend_id : _options.backend_list) { @@ -50,7 +50,8 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap } return nullptr; }(); - assert(fallback != nullptr); // There must be at least one fallback + if (fallback == nullptr) + throw std::runtime_error{"No loaded backends available."}; // 1. Backend for All operations const backend::Backend *backend_all = resolveBackend(manual_options.backend_for_all, fallback); @@ -110,7 +111,7 @@ std::unique_ptr<BackendResolver> ManualScheduler::schedule(const ir::Graph &grap const backend::Backend *ManualScheduler::resolveBackend(const std::string &id, const backend::Backend *fallback) { - // Ensure if the backend is available in the backend + // Ensure if the backend is available in the current backend context const backend::Backend *backend = BackendManager::get().get(id); if (!backend || _backend_contexts.find(backend) == _backend_contexts.end()) { diff --git a/runtime/onert/core/src/compiler/OperationValidator.cc b/runtime/onert/core/src/compiler/OperationValidator.cc index 44496318f..f7f659e3e 100644 --- a/runtime/onert/core/src/compiler/OperationValidator.cc +++ b/runtime/onert/core/src/compiler/OperationValidator.cc @@ -68,19 +68,6 @@ void OperationValidator::operator()() [&](const ir::OperationIndex &, const ir::Operation &node) { node.accept(*this); }); } -void OperationValidator::visit(const ir::operation::Abs &node) { checkUnaryOp(node); } - -void OperationValidator::visit(const ir::operation::AvgPool2D &node) -{ - const auto ofm_index{node.getOutputs().at(0)}; - if (_ctx.at(ofm_index).info().isDynamic()) - return; - - const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; - - OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); -} - void OperationValidator::visit(const ir::operation::BatchMatMul &node) { const auto lhs_index(node.getInputs().at(ir::operation::BatchMatMul::Input::LHS)); @@ -125,17 +112,6 @@ void OperationValidator::visit(const ir::operation::BatchToSpaceND &node) OP_REQUIRES(input_shape.C == output_shape.C); } -void OperationValidator::visit(const ir::operation::Cast &node) -{ - const auto output_index{node.getOutputs().at(0)}; - if (_ctx.at(output_index).info().isDynamic()) - return; - - const auto input_index{node.getInputs().at(0)}; - - OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); -} - void OperationValidator::visit(const ir::operation::Comparison &node) { const auto output_index{node.getOutputs().at(0)}; @@ -177,6 +153,17 @@ void OperationValidator::visit(const ir::operation::InstanceNorm &node) OP_REQUIRES(_ctx.at(beta_index).shape().rank() == 1); } +void OperationValidator::visit(const ir::operation::Pool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + if (_ctx.at(ofm_index).info().isDynamic()) + return; + + const auto ifm_index{node.getInputs().at(ir::operation::Pool2D::Input::INPUT)}; + + OP_REQUIRES(_ctx.at(ifm_index).shape().rank() == 4); +} + void OperationValidator::visit(const ir::operation::Permute &node) { VERBOSE(Permute) << "Configure Permute operation" << std::endl; @@ -298,8 +285,6 @@ void OperationValidator::visit(const ir::operation::RNN &node) num_units == _ctx.at(hidden_state_out_index).shape().dim(1)); } -void OperationValidator::visit(const ir::operation::Round &node) { checkUnaryOp(node); } - void OperationValidator::visit(const ir::operation::SpaceToBatchND &node) { const auto ofm_index{node.getOutputs().at(0)}; @@ -353,6 +338,51 @@ void OperationValidator::visit(const ir::operation::SpaceToDepth &node) OP_REQUIRES(input_shape.C * block_size * block_size == output_shape.C); } +void OperationValidator::visit(const ir::operation::ElementwiseActivation &node) +{ + checkUnaryOp(node); +} + +void OperationValidator::visit(const ir::operation::ElementwiseBinary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)}; + + OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); + OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); +} + +void OperationValidator::visit(const ir::operation::ElementwiseUnary &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)}; + + OP_REQUIRES(node.getInputs().size() == 1); + OP_REQUIRES(node.getOutputs().size() == 1); + + // Check if I/O types match + if (node.param().op_type == ir::operation::ElementwiseUnary::Type::DEQUANTIZE) + { + OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); + OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32); + } + else if (node.param().op_type == ir::operation::ElementwiseUnary::Type::QUANTIZE) + { + OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32); + OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); + } + else if (node.param().op_type != ir::operation::ElementwiseUnary::Type::CAST) + { + OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == _ctx.at(input_index).typeInfo().type()); + } + + if (_ctx.at(output_index).info().isDynamic()) + return; + + OP_REQUIRES(_ctx.at(output_index).shape() == _ctx.at(input_index).shape()); +} + void OperationValidator::visit(const ir::operation::EmbeddingLookup &node) { const auto output_index{node.getOutputs().at(0)}; @@ -389,8 +419,6 @@ void OperationValidator::visit(const ir::operation::EmbeddingLookup &node) } } -void OperationValidator::visit(const ir::operation::Exp &node) { checkUnaryOp(node); } - void OperationValidator::visit(const ir::operation::ExpandDims &node) { const auto output_index{node.getOutputs().at(0)}; @@ -405,8 +433,6 @@ void OperationValidator::visit(const ir::operation::ExpandDims &node) OP_REQUIRES(_ctx.at(axis_index).shape().rank() <= 1); } -void OperationValidator::visit(const ir::operation::Floor &node) { checkUnaryOp(node); } - void OperationValidator::visit(const ir::operation::HashtableLookup &node) { const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)}; @@ -495,21 +521,6 @@ void OperationValidator::visit(const ir::operation::Gather &node) OP_REQUIRES(ofm_shape.rank() <= 4); } -void OperationValidator::visit(const ir::operation::Dequantize &node) -{ - const auto output_index{node.getOutputs().at(0)}; - - const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)}; - - OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::FLOAT32); - - if (_ctx.at(output_index).info().isDynamic()) - return; - OP_REQUIRES(_ctx.at(input_index).shape().rank() <= 4); - OP_REQUIRES(_ctx.at(input_index).shape() == _ctx.at(output_index).shape()); -} - void OperationValidator::visit(const ir::operation::DepthToSpace &node) { // param check @@ -822,30 +833,6 @@ void OperationValidator::visit(const ir::operation::Pad &node) OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); } -void OperationValidator::visit(const ir::operation::Min &node) -{ - const auto output_index{node.getOutputs().at(0)}; - // This validator does not check shape. So checking isDynamic() is skipped. - - const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; - - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); -} - -void OperationValidator::visit(const ir::operation::Max &node) -{ - const auto output_index{node.getOutputs().at(0)}; - // This validator does not check shape. So checking isDynamic() is skipped. - - const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; - const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; - - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); -} - void OperationValidator::visit(const ir::operation::Select &node) { const auto output_index{node.getOutputs().at(0)}; @@ -899,12 +886,6 @@ void OperationValidator::visit(const ir::operation::Split &node) OP_REQUIRES(_ctx.at(input_index).shape().dim(axis) % num_splits == 0); } -void OperationValidator::visit(const ir::operation::Cos &node) { checkUnaryOp(node); } - -void OperationValidator::visit(const ir::operation::Sin &node) { checkUnaryOp(node); } - -void OperationValidator::visit(const ir::operation::RSQRT &node) { checkUnaryOp(node); } - void OperationValidator::visit(const ir::operation::Shape &node) { const auto output_index{node.getOutputs().at(0)}; @@ -961,12 +942,6 @@ void OperationValidator::visit(const ir::operation::While &node) // TODO Add to validate with subgraphs } -void OperationValidator::visit(const ir::operation::Neg &node) { checkUnaryOp(node); } - -void OperationValidator::visit(const ir::operation::Log &node) { checkUnaryOp(node); } - -void OperationValidator::visit(const ir::operation::LogicalNot &node) { checkUnaryOp(node); } - void OperationValidator::visit(const ir::operation::SquaredDifference &node) { const auto output_index{node.getOutputs().at(0)}; @@ -1027,16 +1002,6 @@ void OperationValidator::visit(const ir::operation::Tile &node) OP_REQUIRES(_ctx.at(input_index).shape().rank() == _ctx.at(output_index).shape().rank()); } -void OperationValidator::visit(const ir::operation::LogicalOr &node) -{ - const auto output_index{node.getOutputs().at(0)}; - const auto lhs_index{node.getInputs().at(0)}; - const auto rhs_index{node.getInputs().at(1)}; - - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(rhs_index).typeInfo().type()); - OP_REQUIRES(_ctx.at(lhs_index).typeInfo().type() == _ctx.at(output_index).typeInfo().type()); -} - void OperationValidator::visit(const ir::operation::Range &node) { const auto output_index{node.getOutputs().at(0)}; @@ -1084,24 +1049,5 @@ void OperationValidator::visit(const ir::operation::LogSoftmax &node) OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); } -void OperationValidator::visit(const ir::operation::Quantize &node) -{ - VERBOSE(Quantize) << "Configure Quantize operation" << std::endl; - - OP_REQUIRES(node.getInputs().size() == 1); - OP_REQUIRES(node.getOutputs().size() == 1); - - const auto input_index{node.getInputs().at(0)}; - const auto output_index{node.getOutputs().at(0)}; - - OP_REQUIRES(_ctx.at(input_index).typeInfo().type() == ir::DataType::FLOAT32); - - if (_ctx.at(output_index).info().isDynamic()) - return; - - OP_REQUIRES(_ctx.at(output_index).typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM); - - OP_REQUIRES(_ctx.at(output_index).shape().rank() == _ctx.at(input_index).shape().rank()); -} } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/OperationValidator.h b/runtime/onert/core/src/compiler/OperationValidator.h index b27e6863c..deb6357bb 100644 --- a/runtime/onert/core/src/compiler/OperationValidator.h +++ b/runtime/onert/core/src/compiler/OperationValidator.h @@ -44,58 +44,45 @@ public: void operator()(); public: - void visit(const ir::operation::Abs &node) override; - void visit(const ir::operation::AvgPool2D &node) override; void visit(const ir::operation::BatchMatMul &node) override; void visit(const ir::operation::BatchToSpaceND &node) override; - void visit(const ir::operation::Cast &node) override; void visit(const ir::operation::Comparison &node) override; void visit(const ir::operation::Softmax &node) override; void visit(const ir::operation::InstanceNorm &node) override; void visit(const ir::operation::Permute &node) override; + void visit(const ir::operation::Pool2D &node) override; void visit(const ir::operation::Reduce &node) override; void visit(const ir::operation::Transpose &node) override; void visit(const ir::operation::RNN &node) override; - void visit(const ir::operation::Round &node) override; void visit(const ir::operation::SpaceToBatchND &node) override; void visit(const ir::operation::SpaceToDepth &node) override; + void visit(const ir::operation::ElementwiseActivation &node) override; + void visit(const ir::operation::ElementwiseBinary &node) override; + void visit(const ir::operation::ElementwiseUnary &node) override; void visit(const ir::operation::EmbeddingLookup &node) override; - void visit(const ir::operation::Exp &node) override; void visit(const ir::operation::ExpandDims &node) override; - void visit(const ir::operation::Floor &node) override; void visit(const ir::operation::HashtableLookup &node) override; void visit(const ir::operation::TransposeConv &node) override; void visit(const ir::operation::Gather &node) override; - void visit(const ir::operation::Dequantize &node) override; void visit(const ir::operation::DepthToSpace &node) override; void visit(const ir::operation::Pack &node) override; void visit(const ir::operation::LSTM &node) override; void visit(const ir::operation::L2Normalization &node) override; void visit(const ir::operation::Unpack &node) override; void visit(const ir::operation::Pad &node) override; - void visit(const ir::operation::Min &node) override; - void visit(const ir::operation::Max &node) override; void visit(const ir::operation::Select &node) override; void visit(const ir::operation::StridedSlice &node) override; void visit(const ir::operation::Split &node) override; - void visit(const ir::operation::Cos &node) override; - void visit(const ir::operation::Sin &node) override; - void visit(const ir::operation::RSQRT &node) override; void visit(const ir::operation::Shape &node) override; void visit(const ir::operation::ResizeBilinear &node) override; void visit(const ir::operation::Reverse &node) override; void visit(const ir::operation::If &node) override; void visit(const ir::operation::While &node) override; - void visit(const ir::operation::Neg &node) override; - void visit(const ir::operation::Log &node) override; - void visit(const ir::operation::LogicalNot &node) override; void visit(const ir::operation::SquaredDifference &node) override; void visit(const ir::operation::Tile &node) override; - void visit(const ir::operation::LogicalOr &node) override; void visit(const ir::operation::Range &node) override; void visit(const ir::operation::MatrixBandPart &node) override; void visit(const ir::operation::LogSoftmax &node) override; - void visit(const ir::operation::Quantize &node) override; private: void checkUnaryOp(const ir::Operation &node); diff --git a/runtime/onert/core/src/compiler/StaticShapeInference.cc b/runtime/onert/core/src/compiler/StaticShapeInference.cc index 76c1edcbc..4eba1ff49 100644 --- a/runtime/onert/core/src/compiler/StaticShapeInference.cc +++ b/runtime/onert/core/src/compiler/StaticShapeInference.cc @@ -25,6 +25,64 @@ namespace onert namespace compiler { +bool StaticShapeInferer::infer(const ir::OpSequence &op_seq) +{ + bool has_dynamic_tensor = false; + + for (const auto &operation_idx : op_seq.operations()) + { + auto &op = _operations.at(operation_idx); + auto opcode = op.opcode(); + + _return_has_dynamic_tensor = false; // this is used as a return value inside operation's visit() + + // IF: need shape inference for then, else + // While: need shape inference for condition, body + if (opcode == ir::OpCode::If || opcode == ir::OpCode::While) + { + op.accept(*this); + } + else + { + _return_has_dynamic_tensor = checkDynamicInput(op); + + if (_return_has_dynamic_tensor) + { + setDynamicOutput(op); + } + else + { + op.accept(*this); + } + } + + has_dynamic_tensor = has_dynamic_tensor || _return_has_dynamic_tensor; + } + + return has_dynamic_tensor; +} + +bool StaticShapeInferer::checkDynamicInput(const ir::Operation &op) +{ + for (auto input_idx : op.getInputs() | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED) + { + if (_operands.at(input_idx).info().isDynamic()) + { + return true; + } + } + + return false; +} + +void StaticShapeInferer::setDynamicOutput(const ir::Operation &op) +{ + for (auto output_idx : op.getOutputs()) + { + _operands.at(output_idx).info().setDynamic(); + } +} + void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, const ir::OperandIndex lhs_idx, const ir::OperandIndex rhs_idx) @@ -35,13 +93,6 @@ void StaticShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op, const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (lhs.info().isDynamic() || rhs.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs.info().shape(), rhs.info().shape()); output.info().shape(new_shape); @@ -56,14 +107,6 @@ void StaticShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape ir::Shape new_shape = input.info().shape(); output.info().shape(new_shape); @@ -99,17 +142,6 @@ void StaticShapeInferer::dump() } } -void StaticShapeInferer::visit(const ir::operation::Abs &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::Add &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS), - op.getInputs().at(ir::operation::Add::Input::RHS)); -} - void StaticShapeInferer::visit(const ir::operation::ArgMax &op) { const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; @@ -118,15 +150,6 @@ void StaticShapeInferer::visit(const ir::operation::ArgMax &op) // get mutable output operand const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - const auto rank = input.info().shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -145,35 +168,22 @@ void StaticShapeInferer::visit(const ir::operation::BatchMatMul &op) const auto lhs = _operands.at(lhs_index); const auto rhs = _operands.at(rhs_index); auto &output = _operands.at(output_index); - - if (lhs.info().isDynamic() || rhs.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - auto new_shape = shape_inference::inferBatchMatMulShape(lhs.shape(), rhs.shape(), op.param()); output.info().shape(new_shape); } -void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) +void StaticShapeInferer::visit(const ir::operation::BinaryArithmetic &op) { - const auto input_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::INPUT)}; - const auto &input = _operands.at(input_idx); + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS), + op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)); +} +void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) +{ // get mutable output operand const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic. - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - const auto shape_idx{op.getInputs().at(ir::operation::BroadcastTo::Input::SHAPE)}; const auto &shape = _operands.at(shape_idx); @@ -192,11 +202,6 @@ void StaticShapeInferer::visit(const ir::operation::BroadcastTo &op) output.info().shape(new_shape); } -void StaticShapeInferer::visit(const ir::operation::Cast &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::Comparison &op) { handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0), @@ -215,14 +220,6 @@ void StaticShapeInferer::visit(const ir::operation::Concat &op) { const auto input_idx{op.getInputs().at(i)}; const auto &input = _operands.at(input_idx); - - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - input_shapes.emplace_back(input.shape()); } @@ -241,33 +238,26 @@ void StaticShapeInferer::visit(const ir::operation::Conv2D &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input.info().isDynamic() || ker.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape ir::Shape new_shape = shape_inference::inferConv2DShape(input.info().shape(), ker.info().shape(), op.param()); output.info().shape(new_shape); } -void StaticShapeInferer::visit(const ir::operation::Cos &op) +void StaticShapeInferer::visit(const ir::operation::ElementwiseActivation &op) { - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT)); + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::Input::INPUT)); } -void StaticShapeInferer::visit(const ir::operation::Div &op) +void StaticShapeInferer::visit(const ir::operation::ElementwiseBinary &op) { - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS), - op.getInputs().at(ir::operation::Div::Input::RHS)); + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS), + op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)); } -void StaticShapeInferer::visit(const ir::operation::Exp &op) +void StaticShapeInferer::visit(const ir::operation::ElementwiseUnary &op) { - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT)); + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)); } void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) @@ -279,13 +269,6 @@ void StaticShapeInferer::visit(const ir::operation::ExpandDims &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - if (!axis.isConstant()) { output.info().setDynamic(); @@ -310,13 +293,6 @@ void StaticShapeInferer::visit(const ir::operation::Fill &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - if (!input.isConstant()) { output.info().setDynamic(); @@ -345,15 +321,6 @@ void StaticShapeInferer::visit(const ir::operation::FullyConnected &op) // get mutable output operand const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - - // if input or ker is dynamic, output also becomes dynamic - if (input.info().isDynamic() || ker.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape ir::Shape new_shape = shape_inference::inferFullyConnectedShape(input.info().shape(), ker.info().shape()); @@ -376,15 +343,6 @@ void StaticShapeInferer::visit(const ir::operation::Gather &op) const auto indices_idx{op.getInputs().at(ir::operation::Gather::Input::INDICES)}; const auto &indices = _operands.at(indices_idx); - - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic() || indices.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - const auto rank = input.info().shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -476,27 +434,6 @@ void StaticShapeInferer::visit(const ir::operation::If &op) } } -void StaticShapeInferer::visit(const ir::operation::Log &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::LogicalNot &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::LogicalOr &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0), - op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)); -} - -void StaticShapeInferer::visit(const ir::operation::Logistic &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::L2Normalization &op) { handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::Input::INPUT)); @@ -507,29 +444,6 @@ void StaticShapeInferer::visit(const ir::operation::MatrixBandPart &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::Input::INPUT)); } -void StaticShapeInferer::visit(const ir::operation::Max &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS), - op.getInputs().at(ir::operation::Max::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Min &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS), - op.getInputs().at(ir::operation::Min::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Mul &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS), - op.getInputs().at(ir::operation::Mul::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Neg &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::OneHot &op) { const auto indice_idx{op.getInputs().at(ir::operation::OneHot::Input::INDICES)}; @@ -542,7 +456,7 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op) auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (indice.info().isDynamic() || depth.info().isDynamic() || !depth.isConstant()) + if (!depth.isConstant()) { output.info().setDynamic(); _return_has_dynamic_tensor = true; @@ -558,18 +472,6 @@ void StaticShapeInferer::visit(const ir::operation::OneHot &op) void StaticShapeInferer::visit(const ir::operation::Pack &op) { - bool is_any_of_inputs_dynamic = [&]() -> bool { - for (uint32_t i = 0; i < op.getInputs().size(); ++i) - { - const auto &input = _operands.at(op.getInputs().at(i)); - if (input.info().isDynamic()) - { - return true; - } - } - return false; - }(); - const auto input_idx{op.getInputs().at(0)}; const auto &input = _operands.at(input_idx); @@ -577,14 +479,6 @@ void StaticShapeInferer::visit(const ir::operation::Pack &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (is_any_of_inputs_dynamic) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - const auto rank = input.shape().rank() + 1; const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); const auto num = op.param().num; @@ -608,14 +502,6 @@ void StaticShapeInferer::visit(const ir::operation::Pad &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic or pad is dynamic, output also becomes dynamic - if (input.info().isDynamic() || pad.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // if pad is not constant, output also becomes dynamic if (!pad.isConstant()) { @@ -638,13 +524,6 @@ void StaticShapeInferer::visit(const ir::operation::Permute &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape // Permute is a special operation that layouts of input/output may be different on backend // However, it is not applied here, so input/output have the same layout of frontend. Because @@ -672,13 +551,6 @@ void StaticShapeInferer::visit(const ir::operation::Range &op) // get mutable output operand const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if any input is dynamic, output also becomes dynamic - if (start_op.info().isDynamic() || limit_op.info().isDynamic() || delta_op.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } ir::Shape new_shape; if (start_op.isConstant() && limit_op.isConstant() && delta_op.isConstant()) @@ -716,14 +588,6 @@ void StaticShapeInferer::visit(const ir::operation::Reduce &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - std::vector<int32_t> axes_vec; for (size_t i = 0; i < axes.shape().num_elements(); ++i) { @@ -761,14 +625,6 @@ void StaticShapeInferer::visit(const ir::operation::Reshape &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // New shape is given by second input tensor if (op.getInputs().size() == 2) { @@ -827,14 +683,6 @@ void StaticShapeInferer::visit(const ir::operation::ResizeBilinear &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // Shape inferencing logic based on Params ir::Shape new_shape = shape_inference::inferResizeBilinearShape( input.shape(), op.param().height_out, op.param().width_out); @@ -852,16 +700,6 @@ void StaticShapeInferer::visit(const ir::operation::Reverse &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::Input::INPUT)); } -void StaticShapeInferer::visit(const ir::operation::Round &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT)); -} - -void StaticShapeInferer::visit(const ir::operation::RSQRT &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::Select &op) { const auto input_cond_idx{op.getInputs().at(ir::operation::Select::Input::CONDITION)}; @@ -876,14 +714,6 @@ void StaticShapeInferer::visit(const ir::operation::Select &op) auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input_cond.info().isDynamic() || input_true.info().isDynamic() || - input_false.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // Select output shpae ir::Shape new_shape = shape_inference::inferSelectShape( input_cond.info().shape(), input_true.info().shape(), input_false.info().shape()); @@ -899,14 +729,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // re-sizing output shape ir::Shape output_shape; output_shape.append(input.info().shape().rank()); @@ -914,11 +736,6 @@ void StaticShapeInferer::visit(const ir::operation::Shape &op) output.info().shape(output_shape); } -void StaticShapeInferer::visit(const ir::operation::Sin &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::Slice &op) { const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; @@ -930,13 +747,6 @@ void StaticShapeInferer::visit(const ir::operation::Slice &op) const auto output_index = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_index); - if (input.info().isDynamic() || begins.info().isDynamic() || sizes.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // Whether input is constant or not does not affect whether output is dynamic or not if (!(begins.isConstant() && sizes.isConstant())) { @@ -970,13 +780,6 @@ void StaticShapeInferer::visit(const ir::operation::SpaceToBatchND &op) const auto &block_shape = _operands.at(block_shape_idx); const auto &padding = _operands.at(padding_idx); - if (input.info().isDynamic() || block_shape.info().isDynamic() || padding.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - // Whether input is constant or not does not affect whether output is dynamic or not if (!(block_shape.isConstant() && padding.isConstant())) { @@ -1006,18 +809,6 @@ void StaticShapeInferer::visit(const ir::operation::Split &op) const auto axis = op.param().axis; const auto num_splits = op.param().num_splits; - if (input.info().isDynamic()) - { - for (int out_tensor_idx = 0; out_tensor_idx < num_splits; out_tensor_idx++) - { - const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); - output.info().setDynamic(); - } - _return_has_dynamic_tensor = true; - return; - } - const auto rank = input.info().shape().rank(); auto axis_resolved = axis < 0 ? axis + rank : axis; @@ -1072,14 +863,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) const auto output_index = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_index); - if (input.info().isDynamic() || starts.info().isDynamic() || ends.info().isDynamic() || - strides.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - if (!(starts.isConstant() && ends.isConstant() && strides.isConstant())) { output.info().setDynamic(); @@ -1104,17 +887,6 @@ void StaticShapeInferer::visit(const ir::operation::StridedSlice &op) output.info().shape(new_shape); } -void StaticShapeInferer::visit(const ir::operation::Sub &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS), - op.getInputs().at(ir::operation::Sub::Input::RHS)); -} - -void StaticShapeInferer::visit(const ir::operation::Tanh &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::Input::INPUT)); -} - void StaticShapeInferer::visit(const ir::operation::Tile &op) { const auto input_idx{op.getInputs().at(ir::operation::Tile::Input::INPUT)}; @@ -1126,13 +898,6 @@ void StaticShapeInferer::visit(const ir::operation::Tile &op) const auto output_idx = op.getOutputs().at(0); ir::Operand &output = _operands.at(output_idx); - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } - if (!multiplier.isConstant()) { output.info().setDynamic(); @@ -1158,13 +923,7 @@ void StaticShapeInferer::visit(const ir::operation::Transpose &op) ir::Operand &output = _operands.at(output_idx); const auto perm{op.param().perm}; // const auto rank{op.param().rank}; - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - output.info().setDynamic(); - _return_has_dynamic_tensor = true; - return; - } + // set output shape, based on input and params ir::Shape new_shape = shape_inference::inferTransposeShape(input.info().shape(), perm); output.info().shape(new_shape); @@ -1175,20 +934,6 @@ void StaticShapeInferer::visit(const ir::operation::Unpack &op) const auto input_idx{op.getInputs().at(0)}; const auto &input = _operands.at(input_idx); const auto num = op.param().num; - - // if input is dynamic, output also becomes dynamic - if (input.info().isDynamic()) - { - for (int out_tensor_idx = 0; out_tensor_idx < num; out_tensor_idx++) - { - const auto output_idx = op.getOutputs().at(out_tensor_idx); - ir::Operand &output = _operands.at(output_idx); - output.info().setDynamic(); - } - _return_has_dynamic_tensor = true; - return; - } - const auto rank = input.shape().rank(); const auto axis = ((op.param().axis < 0) ? rank + op.param().axis : op.param().axis); @@ -1346,11 +1091,6 @@ void StaticShapeInferer::visit(const ir::operation::While &op) } } -void StaticShapeInferer::visit(const ir::operation::ZerosLike &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::Input::INPUT)); -} - } // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/TensorBuilders.h b/runtime/onert/core/src/compiler/TensorBuilders.h index c0a1ebc04..3b0360b4b 100644 --- a/runtime/onert/core/src/compiler/TensorBuilders.h +++ b/runtime/onert/core/src/compiler/TensorBuilders.h @@ -67,17 +67,6 @@ public: return _cf_tensor_builder; } - std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) - { - for (auto &tensor_builder : _tensor_builders) - { - auto tensor = tensor_builder->tensorAt(ind); - if (tensor) - return tensor; - } - return nullptr; - } - private: std::unordered_set<std::shared_ptr<backend::ITensorBuilder>> _tensor_builders; std::shared_ptr<backend::controlflow::TensorBuilder> _cf_tensor_builder; diff --git a/runtime/onert/core/src/compiler/TensorRegistries.h b/runtime/onert/core/src/compiler/TensorRegistries.h new file mode 100644 index 000000000..8be87b081 --- /dev/null +++ b/runtime/onert/core/src/compiler/TensorRegistries.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_TENSOR_REGISTRIES_H__ +#define __ONERT_COMPILER_TENSOR_REGISTRIES_H__ + +#include <unordered_set> +#include <memory> +#include "backend/BackendContext.h" +#include "backend/Backend.h" +#include "backend/controlflow/Config.h" +#include "backend/controlflow/TensorBuilder.h" +#include "backend/controlflow/TensorRegistry.h" + +namespace onert +{ +namespace compiler +{ + +class TensorRegistries +{ +public: + TensorRegistries() = default; + + TensorRegistries(const onert::backend::BackendContexts &backend_contexts, + bool include_controlflow) + { + for (const auto &e : backend_contexts) + { + auto tensor_reg = e.second->tensor_registry; + if (e.first->config()->id() == backend::controlflow::Config::ID) + { + _cf_tensor_reg = + std::dynamic_pointer_cast<backend::controlflow::TensorRegistry>(tensor_reg); + if (include_controlflow) + _tensor_regs.insert(tensor_reg); + } + else + { + _tensor_regs.insert(tensor_reg); + } + } + } + + std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator begin() const + { + return _tensor_regs.cbegin(); + } + std::unordered_set<std::shared_ptr<onert::backend::ITensorRegistry>>::const_iterator end() const + { + return _tensor_regs.cend(); + } + + std::shared_ptr<backend::controlflow::TensorRegistry> getControlflowTensorRegistry() const + { + return _cf_tensor_reg; + } + + std::shared_ptr<backend::ITensor> getITensor(ir::OperandIndex ind) const + { + for (auto &tensor_reg : _tensor_regs) + { + auto tensor = tensor_reg->getITensor(ind); + if (tensor) + return tensor; + } + return nullptr; + } + +private: + std::unordered_set<std::shared_ptr<backend::ITensorRegistry>> _tensor_regs; + std::shared_ptr<backend::controlflow::TensorRegistry> _cf_tensor_reg; +}; + +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_TENSOR_REGISTRIES_H__ diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc index 1742a0dd5..647669e46 100644 --- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.cc @@ -22,20 +22,20 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { -void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation &node) +void ConstantInsertionPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) { const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index); const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index); const auto backend = op_seq_lower_info->backend(); const auto layout = op_seq_lower_info->layout(); - const auto factor = operand::PermuteFactor{backend, layout}; + const auto factor = ir::operand::PermuteFactor{backend, layout}; - for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); @@ -47,7 +47,7 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation auto new_object = object; new_object.unsetDef(); // TODO Remove const_case - const_cast<OperationIndexSet &>(new_object.getUses()).clear(); + const_cast<ir::OperationIndexSet &>(new_object.getUses()).clear(); const auto new_index = _graph.operands().emplace(new_object); _replace_operands_map[key] = new_index; } @@ -89,5 +89,5 @@ void ConstantInsertionPass::callback(const OperationIndex &node_index, Operation } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h index 3ea4dc397..052883c92 100644 --- a/runtime/onert/core/src/ir/pass/ConstantInsertionPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantInsertionPass.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ -#define __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ +#define __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ #include <ir/operand/PermuteFactor.h> #include <ir/Index.h> @@ -25,7 +25,7 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -39,13 +39,13 @@ public: std::string id() final { return "ConstantInsertionPass"; } public: - void callback(const OperationIndex &index, Operation &node) final; + void callback(const ir::OperationIndex &index, ir::Operation &node) final; private: struct ReplaceKey { - OperandIndex index; - operand::PermuteFactor factor; + ir::OperandIndex index; + ir::operand::PermuteFactor factor; bool operator==(const ReplaceKey &other) const { @@ -61,15 +61,16 @@ private: std::size_t operator()(const ReplaceKey &key) const noexcept { using std::hash; - return hash<OperandIndex>()(key.index) ^ (hash<operand::PermuteFactor>()(key.factor) << 1); + return hash<ir::OperandIndex>()(key.index) ^ + (hash<ir::operand::PermuteFactor>()(key.factor) << 1); } }; - std::unordered_map<ReplaceKey, OperandIndex, KeyHasher> _replace_operands_map; + std::unordered_map<ReplaceKey, ir::OperandIndex, KeyHasher> _replace_operands_map; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_CONSTANT_INSERTION_PASS_H__ +#endif // __ONERT_COMPILER_PASS_CONSTANT_INSERTION_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc index 04f4e59c0..1c1dbe0ee 100644 --- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.cc +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.cc @@ -23,28 +23,28 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { -void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation &node) +void ConstantLoweringPass::callback(const ir::OperationIndex &node_index, ir::Operation &node) { const auto &op_sequence_index = _lowered_graph.op_seqs().getOperation(node_index); const auto op_seq_lower_info = _lowered_graph.getLowerInfo(op_sequence_index); const auto backend = op_seq_lower_info->backend(); const auto layout = op_seq_lower_info->layout(); - const auto factor = operand::PermuteFactor{backend, layout}; + const auto factor = ir::operand::PermuteFactor{backend, layout}; // Now this runtime does not support the node making output of operation as constant - for (const auto input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto input : node.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED) { auto &object = _graph.operands().at(input); if (object.isConstant()) { // All constant operand are already assinged at each backend by ContantInsertionPass. So a // constant has `def` and `use` as the same PermuteFactor - _lowered_graph.setLowerInfo(input, std::make_unique<operand::LowerInfo>()); + _lowered_graph.setLowerInfo(input, std::make_unique<ir::operand::LowerInfo>()); _lowered_graph.getLowerInfo(input)->addDefPermuteFactor(factor); _lowered_graph.getLowerInfo(input)->addUsePermuteFactor(factor); } @@ -52,5 +52,5 @@ void ConstantLoweringPass::callback(const OperationIndex &node_index, Operation } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h index 5c9f4352b..e17d776d1 100644 --- a/runtime/onert/core/src/ir/pass/ConstantLoweringPass.h +++ b/runtime/onert/core/src/compiler/pass/ConstantLoweringPass.h @@ -14,15 +14,15 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__ -#define __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__ +#define __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__ #include <ir/Index.h> #include "LoweredOperationPass.h" namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -36,11 +36,11 @@ public: std::string id() final { return "ConstantLoweringPass"; } public: - void callback(const OperationIndex &index, Operation &node) final; + void callback(const ir::OperationIndex &index, ir::Operation &node) final; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_CONSTANT_LOWERING_PASS_H__ +#endif // __ONERT_COMPILER_PASS_CONSTANT_LOWERING_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h index eefb8ddfb..0c5f7d745 100644 --- a/runtime/onert/core/src/ir/pass/LoweredOperandPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperandPass.h @@ -18,11 +18,11 @@ #define __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__ #include "OperandPass.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -30,7 +30,7 @@ namespace pass class LoweredOperandPass : public OperandPass { public: - LoweredOperandPass(ir::LoweredGraph &lowered_graph) + LoweredOperandPass(compiler::LoweredGraph &lowered_graph) : OperandPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING @@ -39,14 +39,14 @@ public: virtual ~LoweredOperandPass() = default; std::string id() override = 0; - void callback(const OperandIndex &i, Operand &o) override = 0; + void callback(const ir::OperandIndex &i, ir::Operand &o) override = 0; protected: - ir::LoweredGraph &_lowered_graph; + compiler::LoweredGraph &_lowered_graph; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert #endif // __ONERT_IR_PASS_LOWERED_OPERAND_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h index 0138712d7..5c8569be2 100644 --- a/runtime/onert/core/src/ir/pass/LoweredOperationPass.h +++ b/runtime/onert/core/src/compiler/pass/LoweredOperationPass.h @@ -18,11 +18,11 @@ #define __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__ #include "OperationPass.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -30,7 +30,7 @@ namespace pass class LoweredOperationPass : public OperationPass { public: - LoweredOperationPass(ir::LoweredGraph &lowered_graph) + LoweredOperationPass(LoweredGraph &lowered_graph) : OperationPass{lowered_graph.graph()}, _lowered_graph{lowered_graph} { // DO NOTHING @@ -39,14 +39,14 @@ public: virtual ~LoweredOperationPass() = default; std::string id() override = 0; - void callback(const OperationIndex &i, Operation &o) override = 0; + void callback(const ir::OperationIndex &i, ir::Operation &o) override = 0; protected: - ir::LoweredGraph &_lowered_graph; + LoweredGraph &_lowered_graph; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert #endif // __ONERT_IR_PASS_LOWERED_OPERATION_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/OperandPass.cc b/runtime/onert/core/src/compiler/pass/OperandPass.cc index 693a0f493..50c001c30 100644 --- a/runtime/onert/core/src/ir/pass/OperandPass.cc +++ b/runtime/onert/core/src/compiler/pass/OperandPass.cc @@ -20,7 +20,7 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -28,9 +28,9 @@ namespace pass void OperandPass::run() { _graph.operands().iterate( - [&](const OperandIndex &index, Operand &object) { callback(index, object); }); + [&](const ir::OperandIndex &index, ir::Operand &object) { callback(index, object); }); } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/OperandPass.h b/runtime/onert/core/src/compiler/pass/OperandPass.h index 393060741..b094879c5 100644 --- a/runtime/onert/core/src/ir/pass/OperandPass.h +++ b/runtime/onert/core/src/compiler/pass/OperandPass.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_OPERAND_PASS_H__ -#define __ONERT_GRAPH_PASS_OPERAND_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_OPERAND_PASS_H__ +#define __ONERT_COMPILER_PASS_OPERAND_PASS_H__ #include "Pass.h" #include "ir/Index.h" @@ -30,7 +30,7 @@ class Operand; namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -44,11 +44,11 @@ public: public: std::string id() override = 0; void run() override final; - virtual void callback(const OperandIndex &i, Operand &o) = 0; + virtual void callback(const ir::OperandIndex &i, ir::Operand &o) = 0; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_OPERAND_PASS_H__ +#endif // __ONERT_COMPILER_PASS_OPERAND_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/OperationPass.cc b/runtime/onert/core/src/compiler/pass/OperationPass.cc index 84b1da3ee..d7a55cb22 100644 --- a/runtime/onert/core/src/ir/pass/OperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/OperationPass.cc @@ -22,7 +22,7 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -30,9 +30,9 @@ namespace pass void OperationPass::run() { _graph.operations().iterate( - [&](const OperationIndex &index, Operation &node) { callback(index, node); }); + [&](const ir::OperationIndex &index, ir::Operation &node) { callback(index, node); }); } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/OperationPass.h b/runtime/onert/core/src/compiler/pass/OperationPass.h index 1733f87ed..ac4d818a2 100644 --- a/runtime/onert/core/src/ir/pass/OperationPass.h +++ b/runtime/onert/core/src/compiler/pass/OperationPass.h @@ -19,8 +19,8 @@ * @brief This file contains OperationPass class */ -#ifndef __ONERT_GRAPH_PASS_OPERATION_PASS_H__ -#define __ONERT_GRAPH_PASS_OPERATION_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_OPERATION_PASS_H__ +#define __ONERT_COMPILER_PASS_OPERATION_PASS_H__ #include "Pass.h" #include "ir/Index.h" @@ -35,7 +35,7 @@ class Operation; namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -62,7 +62,7 @@ public: * @param index is the index of a node in graph * @param node is the node in graph */ - virtual void callback(const OperationIndex &index, Operation &node) = 0; + virtual void callback(const ir::OperationIndex &index, ir::Operation &node) = 0; /** * @brief Run the pass @@ -71,7 +71,7 @@ public: }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_OPERATION_PASS_H__ +#endif // __ONERT_COMPILER_PASS_OPERATION_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/Pass.h b/runtime/onert/core/src/compiler/pass/Pass.h index 1c6628f6f..3f356c337 100644 --- a/runtime/onert/core/src/ir/pass/Pass.h +++ b/runtime/onert/core/src/compiler/pass/Pass.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_PASS_H__ -#define __ONERT_GRAPH_PASS_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_PASS_H__ +#define __ONERT_COMPILER_PASS_PASS_H__ #include <string> @@ -24,12 +24,12 @@ namespace onert namespace ir { class Graph; -} // namespace ir +} // namespace compiler } // namespace onert namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -37,7 +37,7 @@ namespace pass class Pass { public: - Pass(Graph &graph) : _graph{graph} {} + Pass(ir::Graph &graph) : _graph{graph} {} virtual ~Pass() = default; public: @@ -45,11 +45,11 @@ public: virtual void run() = 0; protected: - Graph &_graph; + ir::Graph &_graph; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_PASS_H__ +#endif // __ONERT_COMPILER_PASS_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc index 2deccd40b..f01697034 100644 --- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.cc @@ -21,35 +21,33 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { -void PermutationEliminationPass::callback(const OperationIndex &ind, Operation &node) +void PermutationEliminationPass::callback(const ir::OperationIndex &ind, ir::Operation &node) { _op_ind = ind; node.accept(*this); }; -void PermutationEliminationPass::visit(const operation::Permute &node) +void PermutationEliminationPass::visit(const ir::operation::Permute &node) { auto in_operand = node.getInputs().at(0); auto out_operand = node.getOutputs().at(0); - // Check if two tensors are both portable - // TODO Make this general, this is just a workaround to check two tensors are portable + // Check if two tensors are both portable if not, we can't eliminate the node { auto in_def_factor = _lowered_graph.getLowerInfo(in_operand)->def_factors().getOnlyElement(); auto out_def_factor = _lowered_graph.getLowerInfo(out_operand)->def_factors().getOnlyElement(); - auto in_backend_id = in_def_factor.backend()->config()->id(); - auto out_backend_id = out_def_factor.backend()->config()->id(); + auto in_config = in_def_factor.backend()->config(); + auto out_config = out_def_factor.backend()->config(); - // TODO Fix this workaround that removes only Permute between cpu and controlflow backend. - // This should be general. - if (!((in_backend_id == backend::controlflow::Config::ID && out_backend_id == "cpu") || - (in_backend_id == "cpu" && out_backend_id == backend::controlflow::Config::ID))) + // FIXME Supporting dynamic tensor does not exactly mean those are portable. + // It may need to have another config option for checking if each uses `IPortableTensor`. + if (!(in_config->supportDynamicTensor() && out_config->supportDynamicTensor())) return; } @@ -65,7 +63,7 @@ void PermutationEliminationPass::visit(const operation::Permute &node) if (!op_seq.getOutputs().contains(in_operand)) return; - // Update OpSequence/Operation edges and Operand edges + // Update OpSequence/ir::Operation edges and ir::Operand edges op_seq.replaceOutputs(in_operand, out_operand); for (auto op : op_seq.operations()) { @@ -106,8 +104,8 @@ void PermutationEliminationPass::visit(const operation::Permute &node) }); VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl; - VERBOSE(removePermute) << " - Input (removed) Operand : " << in_operand << std::endl; - VERBOSE(removePermute) << " - Output(kept) Operand : " << out_operand << std::endl; + VERBOSE(removePermute) << " - Input (removed) ir::Operand : " << in_operand << std::endl; + VERBOSE(removePermute) << " - Output(kept) ir::Operand : " << out_operand << std::endl; } else { @@ -145,11 +143,11 @@ void PermutationEliminationPass::visit(const operation::Permute &node) } VERBOSE(removePermute) << "Permute Op removed, node index : " << _op_ind << std::endl; - VERBOSE(removePermute) << " - Input (kept) Operand : " << in_operand << std::endl; - VERBOSE(removePermute) << " - Output(removed) Operand : " << out_operand << std::endl; + VERBOSE(removePermute) << " - Input (kept) ir::Operand : " << in_operand << std::endl; + VERBOSE(removePermute) << " - Output(removed) ir::Operand : " << out_operand << std::endl; } } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h index 614e44cd2..29daf1a82 100644 --- a/runtime/onert/core/src/ir/pass/PermutationEliminationPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationEliminationPass.h @@ -14,15 +14,15 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ -#define __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__ +#define __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__ #include "ir/OperationVisitor.h" #include "LoweredOperationPass.h" namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -40,7 +40,7 @@ namespace pass * @note This is an optimization pass which means that everything should work fine even if this pass * was skipped. */ -class PermutationEliminationPass : public LoweredOperationPass, public OperationVisitor +class PermutationEliminationPass : public LoweredOperationPass, public ir::OperationVisitor { public: using LoweredOperationPass::LoweredOperationPass; @@ -49,17 +49,17 @@ public: std::string id() final { return "PermutationEliminationPass"; } public: - void callback(const OperationIndex &i, Operation &n) final; + void callback(const ir::OperationIndex &i, ir::Operation &n) final; private: - void visit(const operation::Permute &) final; + void visit(const ir::operation::Permute &) final; private: ir::OperationIndex _op_ind; }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_PERMUTATION_ELIMINATION_PASS_H__ +#endif // __ONERT_COMPILER_PASS_PERMUTATION_ELIMINATION_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc index 3578af813..c83a72ada 100644 --- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.cc @@ -31,12 +31,12 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { -void PermutationInsertionPass::callback(const OperandIndex &index, Operand &object) +void PermutationInsertionPass::callback(const ir::OperandIndex &index, ir::Operand &object) { auto &&operand_li = _lowered_graph.getLowerInfo(index); assert(operand_li); @@ -48,10 +48,10 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje return; } - std::list<OperationIndex> permute_indexes; + std::list<ir::OperationIndex> permute_indexes; // Build a map for all necessary type of operands - std::unordered_map<operand::PermuteFactor, OperandIndex> factor_to_index; + std::unordered_map<ir::operand::PermuteFactor, ir::OperandIndex> factor_to_index; { assert(operand_li->def_factors().size() == 1); for (auto factor : operand_li->def_factors()) @@ -72,7 +72,7 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje // Update operations' input that uses this operand { - std::list<OperationIndex> remove_list; + std::list<ir::OperationIndex> remove_list; auto uses = object.getUses(); for (auto use : uses) @@ -121,8 +121,8 @@ void PermutationInsertionPass::callback(const OperandIndex &index, Operand &obje } } -OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &operand_index, - const operand::PermuteFactor &factor) +ir::OperationIndex PermutationInsertionPass::insertPermute(const ir::OperandIndex &operand_index, + const ir::operand::PermuteFactor &factor) { assert(!_graph.isBuildingPhase()); @@ -143,14 +143,14 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera auto output_backend = factor.backend(); // NOTE Permute may not have specific layout because the layout of input and output may be // different. - const auto permute_node_layout = Layout::UNKNOWN; + const auto permute_node_layout = ir::Layout::UNKNOWN; // NOTE If one backend supports several layout, the backend must support Permute operation const backend::Backend *permute_node_backend = compiler::BackendManager::get().getControlflow(); if (input_backend == output_backend) { permute_node_backend = input_backend; } - const operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout}; + const ir::operand::PermuteFactor permute_node_factor{permute_node_backend, permute_node_layout}; // Update LowerInfo of input operand auto operand_lower_info = _lowered_graph.getLowerInfo(operand_index); @@ -158,7 +158,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera operand_lower_info->addUsePermuteFactor(permute_node_factor); // Update LowerInfo of output operand - auto out_operand_li = std::make_unique<operand::LowerInfo>(); + auto out_operand_li = std::make_unique<ir::operand::LowerInfo>(); // The input and output factors of all nodes will be the same except Permute. So Tensor's // allocators allocates memory using only the information of def permutation factor now. @@ -170,13 +170,13 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera // Insert permute operation to the graph const auto input_layout = input_factor.layout(); const auto output_layout = factor.layout(); - using Permute = operation::Permute; + using Permute = ir::operation::Permute; const auto permute_type = [&]() { - if (input_layout == Layout::NHWC && output_layout == Layout::NCHW) + if (input_layout == ir::Layout::NHWC && output_layout == ir::Layout::NCHW) { return Permute::Type::NHWC_TO_NCHW; } - else if (input_layout == Layout::NCHW && output_layout == Layout::NHWC) + else if (input_layout == ir::Layout::NCHW && output_layout == ir::Layout::NHWC) { return Permute::Type::NCHW_TO_NHWC; } @@ -200,7 +200,7 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera auto &op_seq = _lowered_graph.op_seqs().at(op_seq_index); op_seq.setInputs(node.getInputs()); op_seq.setOutputs(node.getOutputs()); - _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<operation::LowerInfo>( + _lowered_graph.setLowerInfo(op_seq_index, std::make_unique<ir::operation::LowerInfo>( permute_node_backend, permute_node_layout)); } @@ -212,5 +212,5 @@ OperationIndex PermutationInsertionPass::insertPermute(const OperandIndex &opera return node_index; } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h index 6c30c6f12..758515385 100644 --- a/runtime/onert/core/src/ir/pass/PermutationInsertionPass.h +++ b/runtime/onert/core/src/compiler/pass/PermutationInsertionPass.h @@ -14,17 +14,17 @@ * limitations under the License. */ -#ifndef __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ -#define __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ +#ifndef __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__ +#define __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__ #include "LoweredOperandPass.h" #include "compiler/BackendManager.h" -#include "ir/Operand.h" //for OperationIndex +#include "ir/Operand.h" #include "ir/operand/PermuteFactor.h" namespace onert { -namespace ir +namespace compiler { namespace pass { @@ -36,7 +36,7 @@ public: public: std::string id() override { return "PermutationInsertionPass"; } - void callback(const OperandIndex &index, Operand &object) override; + void callback(const ir::OperandIndex &index, ir::Operand &object) override; private: /** @@ -45,14 +45,14 @@ private: * @param operand_index is the target operand index for the insertion * @param factor is the output operand's backend type and layout * - * @return OperationIndex + * @return ir::OperationIndex */ - OperationIndex insertPermute(const OperandIndex &operand_index, - const operand::PermuteFactor &factor); + ir::OperationIndex insertPermute(const ir::OperandIndex &operand_index, + const ir::operand::PermuteFactor &factor); }; } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert -#endif // __ONERT_GRAPH_PASS_PERMUTATION_INSERTION_PASS_H__ +#endif // __ONERT_COMPILER_PASS_PERMUTATION_INSERTION_PASS_H__ diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc index 6eb412cf1..c5c95c726 100644 --- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.cc +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.cc @@ -23,11 +23,13 @@ namespace onert { -namespace ir +namespace compiler { namespace pass { +using namespace ir; + void PermutationOperationPass::callback(const OperationIndex &, Operation &node) { node.accept(*this); @@ -70,7 +72,7 @@ void PermutationOperationPass::applyExpandRanks(const Operation &node) "operand used in more than one node"); // TODO remove const_cast later. For example, _ctx may need to be a non const variable or // a node to extend shape may be inserted in front of this operation - const_cast<ir::Shape &>(operand.shape()).extendRank(expanded_rank); + const_cast<Shape &>(operand.shape()).extendRank(expanded_rank); } } } @@ -134,7 +136,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) const auto op_seq_li = _lowered_graph.getLowerInfo(op_seq_index); _lowered_graph.setLowerInfo( next_op_seq_index, - std::make_unique<operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout())); + std::make_unique<ir::operation::LowerInfo>(op_seq_li->backend(), op_seq_li->layout())); } } @@ -164,8 +166,8 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) auto &new_op_seq = _lowered_graph.op_seqs().at(new_op_seq_index); new_op_seq.setInputs(node.getInputs()); new_op_seq.setOutputs(node.getOutputs()); - _lowered_graph.setLowerInfo(new_op_seq_index, - std::make_unique<operation::LowerInfo>(backend, frontend_layout)); + _lowered_graph.setLowerInfo( + new_op_seq_index, std::make_unique<ir::operation::LowerInfo>(backend, frontend_layout)); } // Change PermuteFactors of operands of target node @@ -175,7 +177,7 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) const auto backend = op_seq_li->backend(); const operand::PermuteFactor removed_factor{backend, backend_layout}; const operand::PermuteFactor new_factor{backend, frontend_layout}; - for (const auto &input : node.getInputs() | Remove::DUPLICATED | ir::Remove::UNDEFINED) + for (const auto &input : node.getInputs() | Remove::DUPLICATED | Remove::UNDEFINED) { bool canRemove = true; for (const auto &use : _graph.operands().at(input).getUses()) @@ -227,17 +229,31 @@ void PermutationOperationPass::changeToKeepLayout(const Operation &node) } } -void PermutationOperationPass::visit(const operation::Add &node) { applyExpandRanks(node); } +void PermutationOperationPass::visit(const ir::operation::BinaryArithmetic &node) +{ + applyExpandRanks(node); +} -void PermutationOperationPass::visit(const operation::Concat &node) { applyExpandRanks(node); } +void PermutationOperationPass::visit(const ir::operation::Concat &node) { applyExpandRanks(node); } -void PermutationOperationPass::visit(const operation::Comparison &node) { applyExpandRanks(node); } +void PermutationOperationPass::visit(const ir::operation::Comparison &node) +{ + applyExpandRanks(node); +} -void PermutationOperationPass::visit(const operation::Div &node) { applyExpandRanks(node); } +void PermutationOperationPass::visit(const ir::operation::ElementwiseBinary &node) +{ + applyExpandRanks(node); +} -void PermutationOperationPass::visit(const operation::FullyConnected &node) +void PermutationOperationPass::visit(const ir::operation::ElementwiseUnary &node) { - const auto &input_ind = node.getInputs().at(operation::FullyConnected::Input::INPUT); + applyExpandRanks(node); +} + +void PermutationOperationPass::visit(const ir::operation::FullyConnected &node) +{ + const auto &input_ind = node.getInputs().at(ir::operation::FullyConnected::Input::INPUT); const auto &input_obj = _graph.operands().at(input_ind); const auto &input_shape = input_obj.shape(); @@ -247,9 +263,9 @@ void PermutationOperationPass::visit(const operation::FullyConnected &node) } } -void PermutationOperationPass::visit(const operation::Gather &node) +void PermutationOperationPass::visit(const ir::operation::Gather &node) { - const auto &input_ind = node.getInputs().at(operation::Gather::Input::INPUT); + const auto &input_ind = node.getInputs().at(ir::operation::Gather::Input::INPUT); const auto &input_obj = _graph.operands().at(input_ind); const auto &input_shape = input_obj.shape(); @@ -263,21 +279,9 @@ void PermutationOperationPass::visit(const operation::Gather &node) } } -void PermutationOperationPass::visit(const operation::LogicalAnd &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::LogicalNot &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::LogicalOr &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::Max &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::Min &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::Mul &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::Pack &node) +void PermutationOperationPass::visit(const ir::operation::Pack &node) { - const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT); + const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT); const auto &input_obj = _graph.operands().at(input_ind); const auto &input_shape = input_obj.shape(); @@ -291,11 +295,11 @@ void PermutationOperationPass::visit(const operation::Pack &node) } } -void PermutationOperationPass::visit(const operation::PReLU &node) { applyExpandRanks(node); } +void PermutationOperationPass::visit(const ir::operation::PReLU &node) { applyExpandRanks(node); } -void PermutationOperationPass::visit(const operation::Reshape &node) +void PermutationOperationPass::visit(const ir::operation::Reshape &node) { - const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT); + const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT); const auto &input_obj = _graph.operands().at(input_ind); const auto &input_shape = input_obj.shape(); @@ -309,16 +313,14 @@ void PermutationOperationPass::visit(const operation::Reshape &node) } } -void PermutationOperationPass::visit(const operation::SquaredDifference &node) +void PermutationOperationPass::visit(const ir::operation::SquaredDifference &node) { applyExpandRanks(node); } -void PermutationOperationPass::visit(const operation::Sub &node) { applyExpandRanks(node); } - -void PermutationOperationPass::visit(const operation::Unpack &node) +void PermutationOperationPass::visit(const ir::operation::Unpack &node) { - const auto &input_ind = node.getInputs().at(operation::Reshape::Input::INPUT); + const auto &input_ind = node.getInputs().at(ir::operation::Reshape::Input::INPUT); const auto &input_obj = _graph.operands().at(input_ind); const auto &input_shape = input_obj.shape(); @@ -333,5 +335,5 @@ void PermutationOperationPass::visit(const operation::Unpack &node) } } // namespace pass -} // namespace ir +} // namespace compiler } // namespace onert diff --git a/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h new file mode 100644 index 000000000..2dd76b971 --- /dev/null +++ b/runtime/onert/core/src/compiler/pass/PermutationOperationPass.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__ +#define __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__ + +#include "ir/OperationVisitor.h" +#include "LoweredOperationPass.h" + +namespace onert +{ +namespace compiler +{ +namespace pass +{ + +class PermutationOperationPass : public LoweredOperationPass, public ir::OperationVisitor +{ +public: + using LoweredOperationPass::LoweredOperationPass; + +public: + std::string id() final { return "PermutationOperationPass"; } + +public: + void callback(const ir::OperationIndex &i, ir::Operation &n) final; + +public: + void visit(const ir::operation::BinaryArithmetic &) final; + void visit(const ir::operation::Comparison &) final; + void visit(const ir::operation::Concat &) final; + void visit(const ir::operation::ElementwiseBinary &) final; + void visit(const ir::operation::ElementwiseUnary &) final; + void visit(const ir::operation::Pack &) final; + void visit(const ir::operation::PReLU &) final; + void visit(const ir::operation::SquaredDifference &) final; + void visit(const ir::operation::Unpack &) final; + void visit(const ir::operation::FullyConnected &) final; + void visit(const ir::operation::Gather &) final; + void visit(const ir::operation::Reshape &) final; + +private: + void applyExpandRanks(const ir::Operation &); + void changeToKeepLayout(const ir::Operation &); +}; + +} // namespace pass +} // namespace compiler +} // namespace onert + +#endif // __ONERT_COMPILER_PASS_PERMUTATION_OPERATION_PASS_H__ diff --git a/runtime/onert/core/src/dumper/dot/DotDumper.h b/runtime/onert/core/src/dumper/dot/DotDumper.h index 668785a81..fdbca1642 100644 --- a/runtime/onert/core/src/dumper/dot/DotDumper.h +++ b/runtime/onert/core/src/dumper/dot/DotDumper.h @@ -15,7 +15,7 @@ */ #include "ir/Graph.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" #ifndef __ONERT_DUMPER_DOT_DOT_DUMPER_H__ #define __ONERT_DUMPER_DOT_DOT_DUMPER_H__ @@ -42,7 +42,7 @@ public: : _lowered_graph{nullptr}, _graph(graph), _level{level} { } - DotDumper(const ir::LoweredGraph *lowered_graph, Level level) + DotDumper(const compiler::LoweredGraph *lowered_graph, Level level) : _lowered_graph{lowered_graph}, _graph(_lowered_graph->graph()), _level{level} { } @@ -57,7 +57,7 @@ public: void dump(const std::string &tag); private: - const ir::LoweredGraph *_lowered_graph; + const compiler::LoweredGraph *_lowered_graph; const ir::Graph &_graph; Level _level; }; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.cc b/runtime/onert/core/src/exec/DataflowExecutor.cc index cb516b53a..a69ae9cdb 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.cc +++ b/runtime/onert/core/src/exec/DataflowExecutor.cc @@ -78,11 +78,13 @@ bool DataflowExecutor::noWaitingJobs() } DataflowExecutor::DataflowExecutor( - std::unique_ptr<ir::LoweredGraph> lowered_graph, + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders}, + const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs, + compiler::CodeMap &&code_map) + : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, + std::move(tensor_mgrs)}, _code_map{std::move(code_map)} { VERBOSE(DataflowExecutor) << "Constructing Dataflow Executor" << std::endl; diff --git a/runtime/onert/core/src/exec/DataflowExecutor.h b/runtime/onert/core/src/exec/DataflowExecutor.h index aebb03c23..8d60e3e4b 100644 --- a/runtime/onert/core/src/exec/DataflowExecutor.h +++ b/runtime/onert/core/src/exec/DataflowExecutor.h @@ -49,10 +49,11 @@ public: * @param tensor_builders Tensor builders that are currently used * @param code_map OpSequence and its code map */ - DataflowExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, + DataflowExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, + backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map); void executeImpl() override; diff --git a/runtime/onert/core/src/exec/DynamicShapeInference.cc b/runtime/onert/core/src/exec/DynamicShapeInference.cc index 5ec7012ee..70bddfce4 100644 --- a/runtime/onert/core/src/exec/DynamicShapeInference.cc +++ b/runtime/onert/core/src/exec/DynamicShapeInference.cc @@ -100,17 +100,6 @@ void DynamicShapeInferer::handleSimpleUnaryOp(const ir::Operation &op, assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Abs &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Abs::INPUT)); -} - -void DynamicShapeInferer::visit(const ir::operation::Add &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Add::Input::LHS), - op.getInputs().at(ir::operation::Add::Input::RHS)); -} - void DynamicShapeInferer::visit(const ir::operation::ArgMax &op) { const auto input_idx{op.getInputs().at(ir::operation::ArgMax::Input::INPUT)}; @@ -155,6 +144,12 @@ void DynamicShapeInferer::visit(const ir::operation::BatchMatMul &op) dynamicTensorManagerOf(output)->applyShape(output_index, new_shape); } +void DynamicShapeInferer::visit(const ir::operation::BinaryArithmetic &op) +{ + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::BinaryArithmetic::Input::LHS), + op.getInputs().at(ir::operation::BinaryArithmetic::Input::RHS)); +} + void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op) { auto output_ind = op.getOutputs().at(0); @@ -179,11 +174,6 @@ void DynamicShapeInferer::visit(const ir::operation::BroadcastTo &op) assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Cast &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cast::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::Comparison &op) { handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Comparison::Input::INPUT0), @@ -292,20 +282,20 @@ void DynamicShapeInferer::visit(const ir::operation::Conv2D &op) assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Cos &op) +void DynamicShapeInferer::visit(const ir::operation::ElementwiseActivation &op) { - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Cos::Input::INPUT)); + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseActivation::INPUT)); } -void DynamicShapeInferer::visit(const ir::operation::Div &op) +void DynamicShapeInferer::visit(const ir::operation::ElementwiseBinary &op) { - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Div::Input::LHS), - op.getInputs().at(ir::operation::Div::Input::RHS)); + handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::ElementwiseBinary::Input::LHS), + op.getInputs().at(ir::operation::ElementwiseBinary::Input::RHS)); } -void DynamicShapeInferer::visit(const ir::operation::Exp &op) +void DynamicShapeInferer::visit(const ir::operation::ElementwiseUnary &op) { - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Exp::Input::INPUT)); + handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ElementwiseUnary::Input::INPUT)); } void DynamicShapeInferer::visit(const ir::operation::ExpandDims &op) @@ -430,27 +420,6 @@ void DynamicShapeInferer::visit(const ir::operation::Gather &op) assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Log &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Log::Input::INPUT)); -} - -void DynamicShapeInferer::visit(const ir::operation::LogicalNot &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::LogicalNot::Input::INPUT)); -} - -void DynamicShapeInferer::visit(const ir::operation::LogicalOr &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::LogicalOr::Input::INPUT0), - op.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)); -} - -void DynamicShapeInferer::visit(const ir::operation::Logistic &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Logistic::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::L2Normalization &op) { handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::L2Normalization::INPUT)); @@ -461,29 +430,6 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT)); } -void DynamicShapeInferer::visit(const ir::operation::Max &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Max::Input::LHS), - op.getInputs().at(ir::operation::Max::Input::RHS)); -} - -void DynamicShapeInferer::visit(const ir::operation::Min &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Min::Input::LHS), - op.getInputs().at(ir::operation::Min::Input::RHS)); -} - -void DynamicShapeInferer::visit(const ir::operation::Mul &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Mul::Input::LHS), - op.getInputs().at(ir::operation::Mul::Input::RHS)); -} - -void DynamicShapeInferer::visit(const ir::operation::Neg &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Neg::Input::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::OneHot &op) { auto output_ind = op.getOutputs().at(0); @@ -766,7 +712,7 @@ void DynamicShapeInferer::visit(const ir::operation::ResizeBilinear &op) if (output_shape != output->getShape() || output->buffer() == nullptr) { // change on output shape - _dynamic_tensor_manager->applyShape(output_ind, output_shape); + dynamicTensorManagerOf(output)->applyShape(output_ind, output_shape); } assert(output->buffer() != nullptr); } @@ -776,16 +722,6 @@ void DynamicShapeInferer::visit(const ir::operation::Reverse &op) handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Reverse::INPUT)); } -void DynamicShapeInferer::visit(const ir::operation::Round &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Round::Input::INPUT)); -} - -void DynamicShapeInferer::visit(const ir::operation::RSQRT &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::RSQRT::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::Select &op) { const auto input_cond_idx = op.getInputs().at(ir::operation::Select::Input::CONDITION); @@ -836,11 +772,6 @@ void DynamicShapeInferer::visit(const ir::operation::Shape &op) assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Sin &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Sin::Input::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::Slice &op) { const auto input_index{op.getInputs().at(ir::operation::Slice::Input::INPUT)}; @@ -1003,17 +934,6 @@ void DynamicShapeInferer::visit(const ir::operation::StridedSlice &op) assert(output->buffer() != nullptr); } -void DynamicShapeInferer::visit(const ir::operation::Sub &op) -{ - handleBinaryArithmeticOp(op, op.getInputs().at(ir::operation::Sub::Input::LHS), - op.getInputs().at(ir::operation::Sub::Input::RHS)); -} - -void DynamicShapeInferer::visit(const ir::operation::Tanh &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::Tanh::INPUT)); -} - void DynamicShapeInferer::visit(const ir::operation::Tile &op) { auto output_ind = op.getOutputs().at(0); @@ -1091,10 +1011,5 @@ void DynamicShapeInferer::visit(const ir::operation::Unpack &op) } } -void DynamicShapeInferer::visit(const ir::operation::ZerosLike &op) -{ - handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::ZerosLike::INPUT)); -} - } // namespace exec } // namespace onert diff --git a/runtime/onert/core/src/exec/Execution.cc b/runtime/onert/core/src/exec/Execution.cc index 5b401ecf8..7feb3ab68 100644 --- a/runtime/onert/core/src/exec/Execution.cc +++ b/runtime/onert/core/src/exec/Execution.cc @@ -38,7 +38,10 @@ void Execution::changeInputShape(const ir::IOIndex &index, const ir::Shape &new_ if (_io_desc.inputs.at(index.value()) != 0) throw std::runtime_error("Error in calling order"); - _io_desc.input_shape_signature[index] = new_shape; + // This will be used later to set input tensor dynamic + // Note that 'compiled' model will not be updated with new_shape + // but new_shape will change model input shape while 'running' the model + _io_desc.dynamic_input_shapes[index] = new_shape; } // TODO Remove default parameter @@ -54,8 +57,8 @@ void Execution::setInput(const ir::IOIndex &index, const void *buffer, size_t le // if input_shape_sig is set, input_shape_sig overrides shape in info // note: input_shape_sig contains shape passed by nnfw_set_input_tensorinfo() { - auto input_shape_sig = _io_desc.input_shape_signature.find(index); - auto size_required = (input_shape_sig != _io_desc.input_shape_signature.end()) + auto input_shape_sig = _io_desc.dynamic_input_shapes.find(index); + auto size_required = (input_shape_sig != _io_desc.dynamic_input_shapes.end()) ? input_shape_sig->second.num_elements() * onert::ir::sizeOfDataType(info.typeInfo().type()) : info.total_size(); @@ -154,8 +157,8 @@ bool Execution::isFinished(void) const { return finished; } ir::Shape Execution::getInputShape(ir::IOIndex ind) const { - auto itr = _io_desc.input_shape_signature.find(ind); - if (itr == _io_desc.input_shape_signature.end()) + auto itr = _io_desc.dynamic_input_shapes.find(ind); + if (itr == _io_desc.dynamic_input_shapes.end()) { auto operand_idx = primary_subgraph().getInputs().at(ind.value()); return primary_subgraph().operands().at(operand_idx).shape(); diff --git a/runtime/onert/core/src/exec/ExecutorBase.cc b/runtime/onert/core/src/exec/ExecutorBase.cc index 864ccb31a..f835a9675 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.cc +++ b/runtime/onert/core/src/exec/ExecutorBase.cc @@ -26,12 +26,14 @@ namespace onert namespace exec { -ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph, +ExecutorBase::ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders) + const compiler::TensorRegistries &tensor_regs, + backend::TensorManagerSet &&tensor_mgrs) : _lowered_graph{std::move(lowered_graph)}, _graph{_lowered_graph->graph()}, - _input_tensors{input_tensors}, _output_tensors{output_tensors}, _mutex() + _input_tensors{input_tensors}, _output_tensors{output_tensors}, + _tensor_mgrs{std::move(tensor_mgrs)}, _mutex() { // TODO Fix the way of knowing whether it is primary or not bool primary_executor = !(_input_tensors.empty() && _output_tensors.empty()); @@ -41,23 +43,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph, std::vector<std::shared_ptr<backend::ITensor>> list; for (auto ind : ind_seq) { - std::shared_ptr<backend::ITensor> tensor; - for (auto &tensor_builder : tensor_builders) - { - auto tensor_registry = tensor_builder->tensorRegistry(); - assert(tensor_registry); - tensor = tensor_registry->getNativeITensor(ind); - if (tensor != nullptr) - { - if (tensor_builder->supportDynamicTensor()) - { - DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()}; - _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); - } - break; - } - } + std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind); assert(tensor != nullptr); + DynAllocInfo dyn_alloc_info{ind}; + _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); list.push_back(tensor); } return list; @@ -66,23 +55,10 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph, std::vector<std::shared_ptr<backend::ITensor>> list; for (auto ind : ind_seq) { - std::shared_ptr<backend::ITensor> tensor; - for (auto &tensor_builder : tensor_builders) - { - auto tensor_registry = tensor_builder->tensorRegistry(); - assert(tensor_registry); - tensor = tensor_registry->getNativeITensor(ind); - if (tensor != nullptr) - { - if (tensor_builder->supportDynamicTensor()) - { - DynAllocInfo dyn_alloc_info{ind, tensor_builder->dynamicTensorManager()}; - _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); - } - break; - } - } + std::shared_ptr<backend::ITensor> tensor = tensor_regs.getITensor(ind); assert(tensor != nullptr); + DynAllocInfo dyn_alloc_info{ind}; + _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); list.push_back(tensor); } return list; @@ -92,42 +68,23 @@ ExecutorBase::ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph, } else { - // If primary graph, all the inputs and outputs belong to controlflow backend - auto cf_dyn_tensor_builder = tensor_builders.getControlflowTensorBuilder(); - assert(cf_dyn_tensor_builder); - assert(input_tensors.size() == _graph.getInputs().size()); assert(output_tensors.size() == _graph.getOutputs().size()); for (uint32_t i = 0; i < input_tensors.size(); i++) { auto tensor = input_tensors[i]; auto ind = _graph.getInputs().at(i); - DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()}; + DynAllocInfo dyn_alloc_info{ind}; _input_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); } for (uint32_t i = 0; i < output_tensors.size(); i++) { auto tensor = output_tensors[i]; auto ind = _graph.getOutputs().at(i); - DynAllocInfo dyn_alloc_info{ind, cf_dyn_tensor_builder->dynamicTensorManager()}; + DynAllocInfo dyn_alloc_info{ind}; _output_to_dyn_alloc_info.emplace(tensor, dyn_alloc_info); } } - - // Prepare each TensorManager on each backend - for (auto &tensor_builder : tensor_builders) - { - auto s_tensor_manager = tensor_builder->releaseStaticTensorManager(); - if (s_tensor_manager != nullptr) - _tensor_mgrs.insert(std::move(s_tensor_manager)); - - if (tensor_builder->supportDynamicTensor()) - { - auto d_tensor_manager = tensor_builder->releaseDynamicTensorManager(); - if (d_tensor_manager != nullptr) - _tensor_mgrs.insert(std::move(d_tensor_manager)); - } - } } void ExecutorBase::execute(const std::vector<std::shared_ptr<backend::ITensor>> &src_tensors, @@ -192,8 +149,8 @@ void ExecutorBase::execute(const IODescription &desc) // TODO Remove dynamic_cast auto tensor = std::dynamic_pointer_cast<backend::controlflow::UserTensor>(_input_tensors[i]); assert(tensor); - auto input_shape = desc.input_shape_signature.find(ir::IOIndex{i}); - if (input_shape != desc.input_shape_signature.end()) + auto input_shape = desc.dynamic_input_shapes.find(ir::IOIndex{i}); + if (input_shape != desc.dynamic_input_shapes.end()) { tensor->set_dynamic(); tensor->setShape(input_shape->second); @@ -258,8 +215,8 @@ void ExecutorBase::execute(const IODescription &desc) */ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescription &desc) { - auto shape_sig_found = desc.input_shape_signature.find(io_ind); - if (shape_sig_found != desc.input_shape_signature.end()) + auto shape_sig_found = desc.dynamic_input_shapes.find(io_ind); + if (shape_sig_found != desc.dynamic_input_shapes.end()) { auto dyn_alloc_info = _input_to_dyn_alloc_info.find(_input_tensors[io_ind.value()]); if (dyn_alloc_info == _input_to_dyn_alloc_info.end()) @@ -269,7 +226,9 @@ void ExecutorBase::handleDynamicInputTensor(ir::IOIndex io_ind, const IODescript auto changed_input_shape = shape_sig_found->second; auto operand_ind = dyn_alloc_info->second.ind; - dyn_alloc_info->second.dyn_tensor_manager->applyShape(operand_ind, changed_input_shape); + auto dyn_tensor_manager = _input_tensors[io_ind.value()]->dynamic_tensor_manager(); + assert(dyn_tensor_manager); + dyn_tensor_manager->applyShape(operand_ind, changed_input_shape); } } diff --git a/runtime/onert/core/src/exec/ExecutorBase.h b/runtime/onert/core/src/exec/ExecutorBase.h index 080c9bbdd..a13be7dbf 100644 --- a/runtime/onert/core/src/exec/ExecutorBase.h +++ b/runtime/onert/core/src/exec/ExecutorBase.h @@ -25,7 +25,7 @@ #include "Sink.h" #include "ShapeConverter.h" #include "exec/IExecutor.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" #include "ir/LowerInfoMap.h" #include "backend/IConfig.h" #include "backend/Backend.h" @@ -33,9 +33,8 @@ #include "exec/IFunction.h" #include "backend/IDynamicTensorManager.h" #include "backend/ITensorManager.h" -#include "backend/ITensorBuilder.h" #include "exec/ExecutionObservee.h" -#include "compiler/TensorBuilders.h" +#include "compiler/TensorRegistries.h" #include <list> namespace onert @@ -51,10 +50,11 @@ public: * @param graph Graph object * @param tensor_builders Tensor builders that are currently used */ - ExecutorBase(std::unique_ptr<ir::LoweredGraph> &&lowered_graph, + ExecutorBase(std::unique_ptr<compiler::LoweredGraph> &&lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders); + const compiler::TensorRegistries &tensor_regs, + backend::TensorManagerSet &&tensor_mgrs); virtual ~ExecutorBase() = default; @@ -102,7 +102,7 @@ protected: protected: ExecutionObservee _subject; std::shared_ptr<ir::OperationIndexMap<int64_t>> _indexed_ranks; - std::unique_ptr<ir::LoweredGraph> _lowered_graph; + std::unique_ptr<compiler::LoweredGraph> _lowered_graph; const ir::Graph &_graph; std::vector<std::shared_ptr<backend::ITensor>> _input_tensors; std::vector<std::shared_ptr<backend::ITensor>> _output_tensors; diff --git a/runtime/onert/core/src/exec/FunctionSequence.cc b/runtime/onert/core/src/exec/FunctionSequence.cc index d413e8162..fb31f7582 100644 --- a/runtime/onert/core/src/exec/FunctionSequence.cc +++ b/runtime/onert/core/src/exec/FunctionSequence.cc @@ -28,7 +28,8 @@ namespace exec void FunctionSequence::run() { - if (_enable_dynamic_shape_inferer) + // TODO Find out when `_enable_dynamic_shape_inferer` is true but `_dynamic_tensor_ctx` is false + if (_enable_dynamic_shape_inferer && _dynamic_tensor_ctx) { if (_dynamic_tensor_ctx->op_seq->size() != _functions.size()) throw std::runtime_error("operation and functions should be mapped one by one"); diff --git a/runtime/onert/core/src/exec/LinearExecutor.h b/runtime/onert/core/src/exec/LinearExecutor.h index 5c099bc16..c224d3f4f 100644 --- a/runtime/onert/core/src/exec/LinearExecutor.h +++ b/runtime/onert/core/src/exec/LinearExecutor.h @@ -46,12 +46,14 @@ public: * @param tensor_builders Tensor builders that are currently used * @param code_map OpSequence and its code map */ - LinearExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, + LinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map, + const compiler::TensorRegistries &tensor_regs, + backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map, const std::vector<ir::OpSequenceIndex> &order) - : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders} + : ExecutorBase{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, + std::move(tensor_mgrs)} { for (auto index : order) { diff --git a/runtime/onert/core/src/exec/ParallelExecutor.cc b/runtime/onert/core/src/exec/ParallelExecutor.cc index b5d81778f..ab234aacd 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.cc +++ b/runtime/onert/core/src/exec/ParallelExecutor.cc @@ -60,12 +60,13 @@ void ParallelExecutor::notify(uint32_t finished_job_id) } ParallelExecutor::ParallelExecutor( - std::unique_ptr<ir::LoweredGraph> lowered_graph, + std::unique_ptr<compiler::LoweredGraph> lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map) - : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_builders, - std::move(code_map)} + const compiler::TensorRegistries &tensor_regs, backend::TensorManagerSet &&tensor_mgrs, + compiler::CodeMap &&code_map) + : DataflowExecutor{std::move(lowered_graph), input_tensors, output_tensors, tensor_regs, + std::move(tensor_mgrs), std::move(code_map)} { VERBOSE(ParallelExecutor) << "Constructing Parallel Executor" << std::endl; } diff --git a/runtime/onert/core/src/exec/ParallelExecutor.h b/runtime/onert/core/src/exec/ParallelExecutor.h index 462cbc6a8..929edfce9 100644 --- a/runtime/onert/core/src/exec/ParallelExecutor.h +++ b/runtime/onert/core/src/exec/ParallelExecutor.h @@ -50,10 +50,11 @@ public: * @param tensor_builders Tensor builders that are currently used * @param code_map OpSequence and its code map */ - ParallelExecutor(std::unique_ptr<ir::LoweredGraph> lowered_graph, + ParallelExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph, const std::vector<std::shared_ptr<backend::ITensor>> &input_tensors, const std::vector<std::shared_ptr<backend::ITensor>> &output_tensors, - const compiler::TensorBuilders &tensor_builders, compiler::CodeMap &&code_map); + const compiler::TensorRegistries &tensor_regs, + backend::TensorManagerSet &&tensor_mgrs, compiler::CodeMap &&code_map); void executeImpl() override; diff --git a/runtime/onert/core/src/exec/feature/nchw/Reader.h b/runtime/onert/core/src/exec/feature/nchw/Reader.h index 48642d8ef..7be9df4d5 100644 --- a/runtime/onert/core/src/exec/feature/nchw/Reader.h +++ b/runtime/onert/core/src/exec/feature/nchw/Reader.h @@ -33,7 +33,7 @@ namespace feature namespace nchw { -template <typename T> class Reader final : public feature::Reader<T> +template <typename T> class Reader : public feature::Reader<T> { public: // Construct for buffer of model inputs @@ -68,15 +68,14 @@ public: } public: - T at(uint32_t ch, uint32_t row, uint32_t col) const override + T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const final { - const auto offset = feature_index_to_byte_offset(0, ch, row, col); - - const T *ptr = reinterpret_cast<const T *>(_ptr + offset); - - return *ptr; + return getRef(batch, ch, row, col); } - T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override + T at(uint32_t ch, uint32_t row, uint32_t col) const final { return getRef(0, ch, row, col); } + +protected: + const T &getRef(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const { const auto offset = feature_index_to_byte_offset(batch, ch, row, col); diff --git a/runtime/onert/core/src/exec/feature/nchw/View.h b/runtime/onert/core/src/exec/feature/nchw/View.h index ff55de199..dbaf1a91e 100644 --- a/runtime/onert/core/src/exec/feature/nchw/View.h +++ b/runtime/onert/core/src/exec/feature/nchw/View.h @@ -17,7 +17,7 @@ #ifndef __ONERT_EXEC_FEATURE_NCHW_VIEW_H__ #define __ONERT_EXEC_FEATURE_NCHW_VIEW_H__ -#include "../Reader.h" +#include "Reader.h" #include "backend/ITensor.h" #include "ir/Shape.h" @@ -34,99 +34,31 @@ namespace feature namespace nchw { -template <typename T> class View final : public feature::Reader<T> +template <typename T> class View final : public Reader<T> { public: // Construct for buffer of model inputs - View(const ir::FeatureShape &shape, T *ptr, size_t len) - : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len} + View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len} { - assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len); - - _strides.W = sizeof(T); - _strides.H = shape.W * sizeof(T); - _strides.C = shape.W * shape.H * sizeof(T); - _strides.N = shape.W * shape.H * shape.C * sizeof(T); + // DO NOTHING } // Construct for backend tensor - View(::onert::backend::ITensor *tensor) - : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} - { - assert(tensor->layout() == ir::Layout::NCHW); - - const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); - _strides.W = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; - _strides.H = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; - _strides.C = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; - _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; - - _shape.W = tensor->dimension(3); - _shape.H = tensor->dimension(2); - _shape.C = tensor->dimension(1); - _shape.N = tensor->dimension(0); - } - -public: - T at(uint32_t ch, uint32_t row, uint32_t col) const override + View(::onert::backend::ITensor *tensor) : Reader<T>{tensor} { - const auto offset = feature_index_to_byte_offset(0, ch, row, col); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; - } - T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override - { - const auto offset = feature_index_to_byte_offset(batch, ch, row, col); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; + // DO NOTHING } public: - T &at(uint32_t ch, uint32_t row, uint32_t col) - { - const auto offset = feature_index_to_byte_offset(0, ch, row, col); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; - } + using Reader<T>::at; T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) { - const auto offset = feature_index_to_byte_offset(batch, ch, row, col); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; + return const_cast<T &>(Reader<T>::getRef(batch, ch, row, col)); } - -private: - size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const + T &at(uint32_t ch, uint32_t row, uint32_t col) { - assert(1u * _shape.N > batch); // shape.N > batch - assert(1u * _shape.C > ch); // shape.C > ch - assert(1u * _shape.H > row); // shape.H > row - assert(1u * _shape.W > col); // shape.W > col - - uint32_t res = 0; - res += batch * _strides.N; - res += ch * _strides.C; - res += row * _strides.H; - res += col * _strides.W; - - return res; + return const_cast<T &>(Reader<T>::getRef(0, ch, row, col)); } - -private: - // TODO Remove _shape - ir::FeatureShape _shape; - using Strides = ir::FeatureShape; - Strides _strides; - uint8_t *_ptr; - size_t _len; }; } // namespace nchw diff --git a/runtime/onert/core/src/exec/feature/nhwc/Reader.h b/runtime/onert/core/src/exec/feature/nhwc/Reader.h index ef27992c3..7730cee72 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/Reader.h +++ b/runtime/onert/core/src/exec/feature/nhwc/Reader.h @@ -34,7 +34,7 @@ namespace feature namespace nhwc { -template <typename T> class Reader final : public feature::Reader<T> +template <typename T> class Reader : public feature::Reader<T> { public: // Construct for buffer of model inputs @@ -70,15 +70,14 @@ public: } public: - T at(uint32_t row, uint32_t col, uint32_t ch) const override + T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const final { - const auto offset = feature_index_to_byte_offset(0, row, col, ch); - - const T *ptr = reinterpret_cast<const T *>(_ptr + offset); - - return *ptr; + return getRef(batch, row, col, ch); } - T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override + T at(uint32_t row, uint32_t col, uint32_t ch) const final { return getRef(0, row, col, ch); } + +protected: + const T &getRef(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const { const auto offset = feature_index_to_byte_offset(batch, row, col, ch); diff --git a/runtime/onert/core/src/exec/feature/nhwc/View.h b/runtime/onert/core/src/exec/feature/nhwc/View.h index a09961a84..72c8c3415 100644 --- a/runtime/onert/core/src/exec/feature/nhwc/View.h +++ b/runtime/onert/core/src/exec/feature/nhwc/View.h @@ -35,101 +35,31 @@ namespace feature namespace nhwc { -template <typename T> class View final : public feature::Reader<T> +template <typename T> class View final : public Reader<T> { public: // Construct for buffer of model inputs - View(const ir::FeatureShape &shape, T *ptr, size_t len) - : _shape{shape}, _ptr{reinterpret_cast<uint8_t *>(ptr)}, _len{len} + View(const ir::FeatureShape &shape, T *ptr, size_t len) : Reader<T>{shape, ptr, len} { - UNUSED_RELEASE(len); // Workaround for unused variable in release mode - assert(shape.N * shape.H * shape.W * shape.C * sizeof(T) == len); - - // No padding - _strides.C = sizeof(T); - _strides.W = shape.C * sizeof(T); - _strides.H = shape.C * shape.W * sizeof(T); - _strides.N = shape.C * shape.W * shape.H * sizeof(T); + // DO NOTHING } // Construct for backend tensor - View(backend::ITensor *tensor) - : _ptr{tensor->buffer() + tensor->calcOffset({0, 0, 0, 0})}, _len{tensor->total_size()} + View(backend::ITensor *tensor) : Reader<T>{tensor} { - assert(tensor->layout() == ir::Layout::NHWC); - - const auto start_offset = tensor->calcOffset({0, 0, 0, 0}); - _strides.C = tensor->dimension(3) == 1 ? 0 : tensor->calcOffset({0, 0, 0, 1}) - start_offset; - _strides.W = tensor->dimension(2) == 1 ? 0 : tensor->calcOffset({0, 0, 1, 0}) - start_offset; - _strides.H = tensor->dimension(1) == 1 ? 0 : tensor->calcOffset({0, 1, 0, 0}) - start_offset; - _strides.N = tensor->dimension(0) == 1 ? 0 : tensor->calcOffset({1, 0, 0, 0}) - start_offset; - - _shape.C = tensor->dimension(3); - _shape.W = tensor->dimension(2); - _shape.H = tensor->dimension(1); - _shape.N = tensor->dimension(0); + // DO NOTHING } public: - T at(uint32_t row, uint32_t col, uint32_t ch) const override - { - const auto offset = feature_index_to_byte_offset(0, row, col, ch); - - const T *ptr = reinterpret_cast<const T *>(_ptr + offset); - - return *ptr; - } - T at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const override - { - const auto offset = feature_index_to_byte_offset(batch, row, col, ch); - - const T *ptr = reinterpret_cast<const T *>(_ptr + offset); - - return *ptr; - } - - T &at(uint32_t row, uint32_t col, uint32_t ch) - { - const auto offset = feature_index_to_byte_offset(0, row, col, ch); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; - } - + using Reader<T>::at; T &at(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) { - const auto offset = feature_index_to_byte_offset(batch, row, col, ch); - - T *ptr = reinterpret_cast<T *>(_ptr + offset); - - return *ptr; + return const_cast<T &>(Reader<T>::getRef(batch, row, col, ch)); } - -private: - size_t feature_index_to_byte_offset(uint32_t batch, uint32_t row, uint32_t col, uint32_t ch) const + T &at(uint32_t row, uint32_t col, uint32_t ch) { - assert(1u * _shape.N > batch); // shape.N > batch - assert(1u * _shape.H > row); // shape.H > row - assert(1u * _shape.W > col); // shape.W > col - assert(1u * _shape.C > ch); // shape.C > ch - - uint32_t res = 0; - res += batch * _strides.N; - res += row * _strides.H; - res += col * _strides.W; - res += ch * _strides.C; - - return res; + return const_cast<T &>(Reader<T>::getRef(0, row, col, ch)); } - -private: - // TODO Remove _shape - ir::FeatureShape _shape; - using Strides = ir::FeatureShape; - Strides _strides; - uint8_t *_ptr; - size_t _len; }; } // namespace nhwc diff --git a/runtime/onert/core/src/interp/InterpOps.lst b/runtime/onert/core/src/interp/InterpOps.lst index 5f646b83f..0714df38a 100644 --- a/runtime/onert/core/src/interp/InterpOps.lst +++ b/runtime/onert/core/src/interp/InterpOps.lst @@ -22,43 +22,32 @@ // // Same list with Operations.lst // Make comment out if operation is not supported in interpreter -INTERP_OP(Add) -INTERP_OP(Sub) +INTERP_OP(BinaryArithmetic) //INTERP_OP(BatchToSpaceND) //INTERP_OP(Cast) INTERP_OP(Conv2D) INTERP_OP(DepthwiseConv2D) -INTERP_OP(AvgPool2D) -INTERP_OP(MaxPool2D) +INTERP_OP(Pool2D) INTERP_OP(Concat) INTERP_OP(FullyConnected) //INTERP_OP(Reduce) INTERP_OP(Reshape) -INTERP_OP(Mul) INTERP_OP(Softmax) //INTERP_OP(Squeeze) //INTERP_OP(Slice) //INTERP_OP(StridedSlice) -INTERP_OP(Tanh) -INTERP_OP(Logistic) -//INTERP_OP(Div) +INTERP_OP(ElementwiseActivation) //INTERP_OP(Transpose) //INTERP_OP(Exp) //INTERP_OP(Comparison) -//INTERP_OP(LogicalAnd) -//INTERP_OP(LogicalOr) //INTERP_OP(LogicalNot) //INTERP_OP(LSTM) //INTERP_OP(RSQRT) -INTERP_OP(ReLU) //INTERP_OP(ResizeBilinear) -INTERP_OP(ReLU1) -INTERP_OP(ReLU6) //INTERP_OP(RNN) //INTERP_OP(Floor) //INTERP_OP(SpaceToBatchND) //INTERP_OP(SpaceToDepth) -//INTERP_OP(L2Pool2D) //INTERP_OP(EmbeddingLookup) //INTERP_OP(L2Normalization) //INTERP_OP(HashtableLookup) @@ -81,6 +70,4 @@ INTERP_OP(Gather) INTERP_OP(Pad) //INTERP_OP(Custom) //INTERP_OP(Permute) -//INTERP_OP(Min) -//INTERP_OP(Max) //INTERP_OP(OneHot) diff --git a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc index 44c955421..86e883524 100644 --- a/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc +++ b/runtime/onert/core/src/interp/operations/BinaryArithmeticOps.cc @@ -19,9 +19,7 @@ #include "OperationUtil.h" #include "interp/Registration.h" -#include "ir/operation/Add.h" -#include "ir/operation/Sub.h" -#include "ir/operation/Mul.h" +#include "ir/operation/BinaryArithmetic.h" #include "misc/polymorphic_downcast.h" #include "cker/Types.h" @@ -39,12 +37,13 @@ enum class OpType MUL }; -template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation &node) +void prepare(ExecEnv *env, const ir::Operation &node) { - const auto &add_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); + const auto &arithmetic_node = + nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node); - const auto lhs_index = node.getInputs().at(add_node.LHS); - const auto rhs_index = node.getInputs().at(add_node.RHS); + const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); + const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); const auto out_index = node.getOutputs().at(0); const auto lhs_tensor = env->tensorAt(lhs_index); @@ -54,7 +53,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation // TODO Util function to compare TensorInfo if (lhs_tensor->data_type() != rhs_tensor->data_type()) { - throw std::runtime_error{"Interp(Add): Different input types"}; + throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Different input types"}; } bool try_broadcast = (lhs_tensor->tensorInfo().shape() != rhs_tensor->tensorInfo().shape()); @@ -65,7 +64,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation rhs_tensor->tensorInfo().shape(), success); if (!success) { - throw std::runtime_error{"Interp(Add): Fail to brodcasting"}; + throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Fail to brodcasting"}; } auto output_info = @@ -86,7 +85,7 @@ template <typename node_type> void prepareAdd(ExecEnv *env, const ir::Operation // TODO Util function to compare TensorInfo if (lhs_tensor->data_type() != out_tensor->data_type()) { - throw std::runtime_error{"Interp(Add): Invalid output type"}; + throw std::runtime_error{"Interp(" + arithmetic_node.name() + "): Invalid output type"}; } } @@ -103,9 +102,9 @@ inline void setActivationParams(int32_t min, int32_t max, params->quantized_activation_max = max; } -template <typename raw_type, typename param_type, OpType op_type> +template <typename raw_type, OpType op_type> void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor *out_tensor, - const param_type ¶m) + const ir::operation::BinaryArithmetic::Param ¶m) { const auto lhs_buffer = lhs_tensor->bufferRO(); const auto rhs_buffer = rhs_tensor->bufferRO(); @@ -146,13 +145,11 @@ void invoke(const ITensor *lhs_tensor, const ITensor *rhs_tensor, const ITensor out_shape, out_ptr); } -template <typename node_type, typename param_type, OpType op_type> -void invokeAdd(const ExecEnv *env, const ir::Operation &node) +template <OpType op_type> +void invokeBinaryArithmetic(const ExecEnv *env, const ir::operation::BinaryArithmetic &node) { - const auto &arithmetic_node = nnfw::misc::polymorphic_downcast<const node_type &>(node); - - const auto lhs_index = node.getInputs().at(arithmetic_node.LHS); - const auto rhs_index = node.getInputs().at(arithmetic_node.RHS); + const auto lhs_index = node.getInputs().at(node.LHS); + const auto rhs_index = node.getInputs().at(node.RHS); const auto out_index = node.getOutputs().at(0); const auto lhs_tensor = env->tensorAt(lhs_index); const auto rhs_tensor = env->tensorAt(rhs_index); @@ -161,38 +158,46 @@ void invokeAdd(const ExecEnv *env, const ir::Operation &node) if (data_type == ir::DataType::INT32) { - invoke<int32_t, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, - arithmetic_node.param()); + invoke<int32_t, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param()); } else if (data_type == ir::DataType::FLOAT32) { - invoke<float, param_type, op_type>(lhs_tensor, rhs_tensor, out_tensor, arithmetic_node.param()); + invoke<float, op_type>(lhs_tensor, rhs_tensor, out_tensor, node.param()); } else { throw std::runtime_error{"NYI: Unsupported data type"}; } } -} // namespace -OpKernel *getAdd() +void invokeBinaryArithmeticOps(const ExecEnv *env, const ir::Operation &node) { - static OpKernel kernel = {prepareAdd<ir::operation::Add>, - invokeAdd<ir::operation::Add, ir::operation::Add::Param, OpType::ADD>}; - return &kernel; -} + const auto &arithmetic_node = + nnfw::misc::polymorphic_downcast<const ir::operation::BinaryArithmetic &>(node); -OpKernel *getSub() -{ - static OpKernel kernel = {prepareAdd<ir::operation::Sub>, - invokeAdd<ir::operation::Sub, ir::operation::Sub::Param, OpType::SUB>}; - return &kernel; + switch (arithmetic_node.param().arithmetic_type) + { + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + invokeBinaryArithmetic<OpType::ADD>(env, arithmetic_node); + break; + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + invokeBinaryArithmetic<OpType::SUB>(env, arithmetic_node); + break; + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + invokeBinaryArithmetic<OpType::MUL>(env, arithmetic_node); + break; + default: + throw std::runtime_error{"Interp(BinaryArithmetic): NYI unsupported operation " + + arithmetic_node.name()}; + break; + } } -OpKernel *getMul() +} // namespace + +OpKernel *getBinaryArithmetic() { - static OpKernel kernel = {prepareAdd<ir::operation::Mul>, - invokeAdd<ir::operation::Mul, ir::operation::Mul::Param, OpType::MUL>}; + static OpKernel kernel = {prepare, invokeBinaryArithmeticOps}; return &kernel; } diff --git a/runtime/onert/core/src/interp/operations/UnaryActivations.cc b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc index ea5e2417b..c8773bef4 100644 --- a/runtime/onert/core/src/interp/operations/UnaryActivations.cc +++ b/runtime/onert/core/src/interp/operations/ElementwiseActivations.cc @@ -20,10 +20,11 @@ #include "interp/Registration.h" -#include "ir/operation/ReLU.h" -#include "ir/operation/ReLU1.h" -#include "ir/operation/ReLU6.h" -#include "ir/operation/Tanh.h" +#include "ir/operation/ElementwiseActivation.h" + +#include <misc/polymorphic_downcast.h> +#include <cker/operation/Logistic.h> +#include <cker/operation/Tanh.h> namespace onert { @@ -34,9 +35,8 @@ namespace enum class ActivationType { + Logistic, ReLU, - ReLU1, - ReLU6, Tanh }; @@ -65,30 +65,25 @@ void prepare(ExecEnv *env, const ir::Operation &node) // TODO Util function to compare TensorInfo if (input_tensor->data_type() != output_tensor->data_type()) { - throw std::runtime_error{"Interp(Activations): Invalid output type"}; + throw std::runtime_error{"Interp(ElementwiseActivation): Invalid output type"}; } } template <ActivationType act_type> -void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements) +void evalFloat(const float *input_ptr, float *output_ptr, uint64_t num_elements, float alpha, + float beta) { std::function<float(const float &)> fn = [](const float &) { return std::nanf(""); }; switch (act_type) { case ActivationType::ReLU: - fn = [](const float &in) { return std::max(0.f, in); }; - break; - case ActivationType::ReLU1: - fn = [](const float &in) { return std::min(std::max(-1.f, in), 1.f); }; - break; - case ActivationType::ReLU6: - fn = [](const float &in) { return std::min(std::max(0.f, in), 6.f); }; + fn = [alpha, beta](const float &in) { return std::min(std::max(beta, in), alpha); }; break; case ActivationType::Tanh: fn = [](const float &in) { return std::tanh(in); }; break; default: - throw std::runtime_error{"Interp(Activations): NYI - Unsupported activation"}; + throw std::runtime_error{"Interp(ElementwiseActivation): NYI - Unsupported activation"}; break; } @@ -114,38 +109,51 @@ template <ActivationType act_type> void invoke(const ExecEnv *env, const ir::Ope uint64_t elements = input_tensor->num_elements(); const float *input_start = reinterpret_cast<const float *>(input_tensor->bufferRO()); float *out = reinterpret_cast<float *>(output_tensor->buffer()); - - evalFloat<act_type>(input_start, out, elements); + if (act_type == ActivationType::Logistic) + { + const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); + const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); + nnfw::cker::Logistic(cker_input_shape, input_start, cker_output_shape, out); + } + else + { + const auto &act_node = + nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node); + evalFloat<act_type>(input_start, out, elements, act_node.param().alpha, + act_node.param().beta); + } } else { - throw std::runtime_error{"Interp(ReLU6): NYI - Support float only"}; + throw std::runtime_error{"Interp(" + node.name() + "): NYI - Support float only"}; } } -} // namespace - -OpKernel *getReLU() +void invokeElementwiseActivation(const ExecEnv *env, const ir::Operation &node) { - static OpKernel kernel = {prepare, invoke<ActivationType::ReLU>}; - return &kernel; -} - -OpKernel *getReLU1() -{ - static OpKernel kernel = {prepare, invoke<ActivationType::ReLU1>}; - return &kernel; + const auto &act_node = + nnfw::misc::polymorphic_downcast<const ir::operation::ElementwiseActivation &>(node); + switch (act_node.param().op_type) + { + case ir::operation::ElementwiseActivation::Type::LOGISTIC: + invoke<ActivationType::Logistic>(env, node); + break; + case ir::operation::ElementwiseActivation::Type::RELU: + invoke<ActivationType::ReLU>(env, node); + break; + case ir::operation::ElementwiseActivation::Type::TANH: + invoke<ActivationType::Tanh>(env, node); + break; + default: + throw std::runtime_error("Interp(" + node.name() + "): NYI - Unsupported activation"); + } } -OpKernel *getReLU6() -{ - static OpKernel kernel = {prepare, invoke<ActivationType::ReLU6>}; - return &kernel; -} +} // namespace -OpKernel *getTanh() +OpKernel *getElementwiseActivation() { - static OpKernel kernel = {prepare, invoke<ActivationType::Tanh>}; + static OpKernel kernel = {prepare, invokeElementwiseActivation}; return &kernel; } diff --git a/runtime/onert/core/src/interp/operations/Logistic.cc b/runtime/onert/core/src/interp/operations/Logistic.cc deleted file mode 100644 index c23cbb782..000000000 --- a/runtime/onert/core/src/interp/operations/Logistic.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/Logistic.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/Logistic.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepareLogistic(ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(0); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - - const auto output_info = env->graph().operands().at(output_index).info(); - - // Check shape and type lhs is same with rhs - // TODO Util function to compare TensorInfo - if (output_info.total_size() == 0) - { - throw std::runtime_error{"Interp(TConv): NYI unspecified output shape"}; - } - else - { - env->allocateIfNeeded(output_index, output_info); - } - - const auto output_tensor = env->tensorAt(output_index); - if (input_tensor->data_type() != output_tensor->data_type()) - { - throw std::runtime_error{"Interp(Logistic): Invalid output type"}; - } -} - -void invoke(const ITensor *input_tensor, const ITensor *output_tensor) -{ - const auto input_buffer = input_tensor->bufferRO(); - auto output_buffer = output_tensor->buffer(); - - const auto cker_input_shape = convertShape(input_tensor->tensorInfo().shape()); - const auto cker_output_shape = convertShape(output_tensor->tensorInfo().shape()); - const float *input_ptr = reinterpret_cast<const float *>(input_buffer); - float *output_ptr = reinterpret_cast<float *>(output_buffer); - - nnfw::cker::Logistic(cker_input_shape, input_ptr, cker_output_shape, output_ptr); -} - -void invokeLogistic(const ExecEnv *env, const ir::Operation &node) -{ - const auto input_index = node.getInputs().at(0); - const auto output_index = node.getOutputs().at(0); - - const auto input_tensor = env->tensorAt(input_index); - const auto output_tensor = env->tensorAt(output_index); - - const auto data_type = input_tensor->data_type(); - - if (data_type == ir::DataType::FLOAT32) - { - invoke(input_tensor, output_tensor); - } - else - { - throw std::runtime_error{"Interp(Logistic): NYI - Unsupported data type"}; - } -} -} // namespace - -OpKernel *getLogistic() -{ - static OpKernel kernel = {prepareLogistic, invokeLogistic}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/MaxPool2D.cc b/runtime/onert/core/src/interp/operations/MaxPool2D.cc deleted file mode 100644 index 313948fb6..000000000 --- a/runtime/onert/core/src/interp/operations/MaxPool2D.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <cker/operation/MaxPool.h> - -#include "OperationUtil.h" - -#include "interp/Registration.h" -#include "ir/operation/MaxPool2D.h" -#include "util/Utils.h" -#include "util/ShapeInference.h" -#include "misc/polymorphic_downcast.h" - -namespace onert -{ -namespace interp -{ -namespace -{ - -void prepareMaxPool2D(ExecEnv *env, const ir::Operation &node) -{ - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - - assert(in_tensor->num_dimensions() == 4); - UNUSED_RELEASE(in_tensor); - - const auto output_info = env->graph().operands().at(out_index).info(); - if (output_info.total_size() == 0) - { - // Handle unspecified output shape - const auto &maxpool_node = - nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); - const auto infered_output_shape = - shape_inference::inferMaxPoolShape(in_tensor->tensorInfo().shape(), maxpool_node.param()); - env->allocateIfNeeded( - out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo())); - } - else - { - env->allocateIfNeeded(out_index, output_info); - } - - auto out_tensor = env->tensorAt(out_index); - UNUSED_RELEASE(out_tensor); - - // Handle same ifm & ofm data type only - assert(in_tensor->data_type() == out_tensor->data_type()); - assert(out_tensor->num_dimensions() == 4); -} - -void invoke(const ITensor *in_tensor, const ITensor *out_tensor, - const ir::operation::MaxPool2D::Param ¶m) -{ - // TODO support NCHW frontend - const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); - const auto padding = - ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh); - // Calculate - nnfw::cker::PoolParams cker_param; - calculateActivationRange(param.activation, &cker_param.float_activation_min, - &cker_param.float_activation_max); - cker_param.filter_width = param.kw; - cker_param.filter_height = param.kh; - cker_param.padding_values.width = padding.left; - cker_param.padding_values.height = padding.top; - cker_param.stride_width = param.stride.horizontal; - cker_param.stride_height = param.stride.vertical; - - const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); - float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); - - nnfw::cker::MaxPool(cker_param, in_shape, in_ptr, out_shape, out_ptr); -} - -void invokeMaxPool2D(const ExecEnv *env, const ir::Operation &node) -{ - const auto &maxpool_node = - nnfw::misc::polymorphic_downcast<const ir::operation::MaxPool2D &>(node); - - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - const auto in_tensor = env->tensorAt(in_index); - const auto out_tensor = env->tensorAt(out_index); - - const auto data_type = in_tensor->data_type(); - if (data_type == ir::DataType::FLOAT32) - { - invoke(in_tensor, out_tensor, maxpool_node.param()); - } - else - { - throw std::runtime_error{"NYI: Support float32 only"}; - } -} -} // namespace - -OpKernel *getMaxPool2D() -{ - static OpKernel kernel = {prepareMaxPool2D, invokeMaxPool2D}; - return &kernel; -} - -} // namespace interp -} // namespace onert diff --git a/runtime/onert/core/src/interp/operations/AvgPool2D.cc b/runtime/onert/core/src/interp/operations/Pool2D.cc index 42fe42301..92f9d70b2 100644 --- a/runtime/onert/core/src/interp/operations/AvgPool2D.cc +++ b/runtime/onert/core/src/interp/operations/Pool2D.cc @@ -15,11 +15,12 @@ */ #include <cker/operation/AveragePool.h> +#include <cker/operation/MaxPool.h> #include "OperationUtil.h" #include "interp/Registration.h" -#include "ir/operation/AvgPool2D.h" +#include "ir/operation/Pool2D.h" #include "util/Utils.h" #include "util/ShapeInference.h" #include "misc/polymorphic_downcast.h" @@ -28,12 +29,13 @@ namespace onert { namespace interp { -namespace avgpool2d +namespace pool2d { -void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) +void preparePool2D(ExecEnv *env, const ir::Operation &node) { - const auto in_index = node.getInputs().at(0); + const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node); + const auto in_index = node.getInputs().at(pool_node.INPUT); const auto out_index = node.getOutputs().at(0); const auto in_tensor = env->tensorAt(in_index); @@ -45,10 +47,8 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) if (output_info.total_size() == 0) { // Handle unspecified output shape - const auto &avgpool_node = - nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); const auto infered_output_shape = - shape_inference::inferAvgPoolShape(in_tensor->tensorInfo().shape(), avgpool_node.param()); + shape_inference::inferPoolShape(in_tensor->tensorInfo().shape(), pool_node.param()); env->allocateIfNeeded( out_index, ir::OperandInfo::createStaticInfo(infered_output_shape, output_info.typeInfo())); } @@ -65,18 +65,44 @@ void prepareAvgPool2D(ExecEnv *env, const ir::Operation &node) assert(out_tensor->num_dimensions() == 4); } -void invoke(const ITensor *in_tensor, const ITensor *out_tensor, - const ir::operation::AvgPool2D::Param ¶m) +template <typename T> +void invoke(const nnfw::cker::PoolParams ¶ms, const nnfw::cker::Shape &in_shape, + const T *in_ptr, const nnfw::cker::Shape &out_shape, T *out_ptr, + ir::operation::Pool2D::PoolType op_type) { - // TODO Support NCHW frontend + switch (op_type) + { + case ir::operation::Pool2D::PoolType::AVG: + nnfw::cker::AveragePool<T>(params, in_shape, in_ptr, out_shape, out_ptr); + break; + case ir::operation::Pool2D::PoolType::MAX: + nnfw::cker::MaxPool<T>(params, in_shape, in_ptr, out_shape, out_ptr); + break; + default: + throw std::runtime_error{"Interp(Pool2D): NYI unsupported operation"}; + break; + } +} + +void invokePool2DOps(const ExecEnv *env, const ir::Operation &node) +{ + const auto &pool_node = nnfw::misc::polymorphic_downcast<const ir::operation::Pool2D &>(node); + + const auto in_index = node.getInputs().at(0); + const auto out_index = node.getOutputs().at(0); + + // Check lhs shape is same with rhs (with broadcast) + const auto in_tensor = env->tensorAt(in_index); + const auto out_tensor = env->tensorAt(out_index); + + // TODO support NCHW frontend const auto ifm_shape = in_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); const auto ofm_shape = out_tensor->tensorInfo().shape().asFeature(ir::Layout::NHWC); + const auto param = pool_node.param(); const auto padding = ir::calculatePadding(param.padding, ifm_shape, ofm_shape, param.stride, param.kw, param.kh); // Calculate nnfw::cker::PoolParams cker_param; - calculateActivationRange(param.activation, &cker_param.float_activation_min, - &cker_param.float_activation_max); cker_param.filter_width = param.kw; cker_param.filter_height = param.kh; cker_param.padding_values.width = padding.left; @@ -84,41 +110,29 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor, cker_param.stride_width = param.stride.horizontal; cker_param.stride_height = param.stride.vertical; - const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); - const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); - const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); - float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); - - nnfw::cker::AveragePool(cker_param, in_shape, in_ptr, out_shape, out_ptr); -} - -void invokeAvgPool2D(const ExecEnv *env, const ir::Operation &node) -{ - const auto &avgpool_node = - nnfw::misc::polymorphic_downcast<const ir::operation::AvgPool2D &>(node); - - const auto in_index = node.getInputs().at(0); - const auto out_index = node.getOutputs().at(0); - - // Check lhs shape is same with rhs (with broadcast) - const auto in_tensor = env->tensorAt(in_index); - const auto out_tensor = env->tensorAt(out_index); - const auto data_type = in_tensor->data_type(); if (data_type == ir::DataType::FLOAT32) { - invoke(in_tensor, out_tensor, avgpool_node.param()); + calculateActivationRange(param.activation, &cker_param.float_activation_min, + &cker_param.float_activation_max); + + const auto in_shape = convertShape(in_tensor->tensorInfo().shape()); + const auto out_shape = convertShape(out_tensor->tensorInfo().shape()); + const float *in_ptr = reinterpret_cast<const float *>(in_tensor->bufferRO()); + float *out_ptr = reinterpret_cast<float *>(out_tensor->buffer()); + // Now, invoke() supports only Pool2D in float + invoke<float>(cker_param, in_shape, in_ptr, out_shape, out_ptr, param.op_type); } else { throw std::runtime_error{"NYI: Support float only"}; } } -} // namespace avgpool2d +} // namespace pool2d -OpKernel *getAvgPool2D() +OpKernel *getPool2D() { - static OpKernel kernel = {avgpool2d::prepareAvgPool2D, avgpool2d::invokeAvgPool2D}; + static OpKernel kernel = {pool2d::preparePool2D, pool2d::invokePool2DOps}; return &kernel; } diff --git a/runtime/onert/core/src/interp/operations/Softmax.cc b/runtime/onert/core/src/interp/operations/Softmax.cc index 6d9359e1e..d30f78deb 100644 --- a/runtime/onert/core/src/interp/operations/Softmax.cc +++ b/runtime/onert/core/src/interp/operations/Softmax.cc @@ -29,43 +29,6 @@ namespace interp namespace { -void Softmax2D(const float *in, const int input_size, const int batch_size, const float beta, - float *out) -{ - assert(input_size > 0); - - // For each batch - for (int b = 0; b < batch_size; b++) - { - // Find the max coeff. - float max_coeff = in[0]; - for (int i = 1; i < input_size; i++) - { - if (in[i] > max_coeff) - max_coeff = in[i]; - } - - // Compute the normalized sum of exps. - float exp_sum = 0.0; - for (int i = 0; i < input_size; i++) - { - out[i] = std::exp((in[i] - max_coeff) * beta); - exp_sum += out[i]; - } - - // Divide by the sum of exps. - float reciprocal_sum_exp = 1.f / exp_sum; - for (int i = 0; i < input_size; i++) - { - out[i] *= reciprocal_sum_exp; - } - - // Advance in and out pointers for the next batch. - in += input_size; - out += input_size; - } -} - void prepareSoftMax(ExecEnv *env, const ir::Operation &node) { const auto in_index = node.getInputs().at(0); @@ -108,7 +71,7 @@ void invoke(const ITensor *in_tensor, const ITensor *out_tensor, uint32_t batch_size = in_tensor->dimension(0); uint32_t input_size = in_tensor->dimension(1); - Softmax2D(in_ptr, input_size, batch_size, beta, out_ptr); + nnfw::cker::Softmax(in_ptr, input_size, batch_size, beta, out_ptr); } else if (in_tensor->num_dimensions() == 4) { diff --git a/runtime/onert/core/src/ir/Graph.cc b/runtime/onert/core/src/ir/Graph.cc index 0db9b6133..fe8b1b443 100644 --- a/runtime/onert/core/src/ir/Graph.cc +++ b/runtime/onert/core/src/ir/Graph.cc @@ -56,18 +56,34 @@ void Graph::setOperandValue(const OperandIndex &ind, std::shared_ptr<Data> data) _operands.at(ind).data(std::move(data)); } -void Graph::addInput(const OperandIndex &ind) +void Graph::addInput(const OperandIndex &ind, const std::string &name) { assert(isBuildingPhase()); + if (!name.empty()) + _name_to_input.emplace(name, IOIndex{_inputs.size()}); _inputs.append(ind); } -void Graph::addOutput(const OperandIndex &ind) +void Graph::addOutput(const OperandIndex &ind, const std::string &name) { assert(isBuildingPhase()); + if (!name.empty()) + _name_to_output.emplace(name, IOIndex{_outputs.size()}); _outputs.append(ind); } +IOIndex Graph::getInputIndex(const std::string &name) const +{ + auto itr = _name_to_input.find(name); + return (itr == _name_to_input.end()) ? IOIndex{} : itr->second; +} + +IOIndex Graph::getOutputIndex(const std::string &name) const +{ + auto itr = _name_to_output.find(name); + return (itr == _name_to_output.end()) ? IOIndex{} : itr->second; +} + void Graph::finishBuilding(void) { assert(isBuildingPhase()); diff --git a/runtime/onert/core/src/ir/GraphIterator.cc b/runtime/onert/core/src/ir/GraphIterator.cc index 2b29a9ea9..4bea1a55d 100644 --- a/runtime/onert/core/src/ir/GraphIterator.cc +++ b/runtime/onert/core/src/ir/GraphIterator.cc @@ -17,7 +17,7 @@ #include "GraphIterator.h" #include "ir/OperationIndexMap.h" -#include "ir/LoweredGraph.h" +#include "compiler/LoweredGraph.h" namespace onert { diff --git a/runtime/onert/core/src/ir/GraphIterator.h b/runtime/onert/core/src/ir/GraphIterator.h index 534ffef80..b54314e0e 100644 --- a/runtime/onert/core/src/ir/GraphIterator.h +++ b/runtime/onert/core/src/ir/GraphIterator.h @@ -23,12 +23,19 @@ namespace onert { +namespace compiler +{ +class LoweredGraph; +} // namespace compiler +} // namespace onert + +namespace onert +{ namespace ir { class Graph; class Operation; -class LoweredGraph; class OpSequence; template <bool is_const> class Iterator @@ -65,7 +72,8 @@ public: using NodeRef = typename Iterator<is_const>::NodeRef; using IterFn = typename Iterator<is_const>::IterFn; using LoweredGraphRef = - typename std::conditional<is_const, const LoweredGraph &, LoweredGraph &>::type; + typename std::conditional<is_const, const typename compiler::LoweredGraph &, + typename compiler::LoweredGraph &>::type; using OpSequenceRef = typename std::conditional<is_const, const OpSequence &, OpSequence &>::type; using OpSeqIndexRef = const OpSequenceIndex &; using OpSeqIterFn = std::function<void(OpSeqIndexRef, OpSequenceRef)>; diff --git a/runtime/onert/core/src/ir/OpSequences.cc b/runtime/onert/core/src/ir/OpSequences.cc index a87d31a9f..68884783e 100644 --- a/runtime/onert/core/src/ir/OpSequences.cc +++ b/runtime/onert/core/src/ir/OpSequences.cc @@ -83,15 +83,6 @@ OpSequenceIndex OpSequences::getOperation(const OperationIndex &operation_index) return ret; } -// TODO: Extract this into external helper function -void OpSequences::dump(const std::string &msg, const Operations &operations) const -{ - VERBOSE(OpSequences) << "OpSequences(" << msg << ")" << std::endl; - iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) { - VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl; - }); -} - void OpSequences::removeFromOpSequence(const OperationIndex &operation_index) { const auto op_seq_index = findOperation(operation_index); @@ -122,5 +113,12 @@ OpSequenceIndex OpSequences::findOperation(const OperationIndex &operation_index throw std::runtime_error("Operation not found"); } +void dumpOpSequences(const OpSequences &op_seqs, const Operations &operations) +{ + op_seqs.iterate([&](const OpSequenceIndex &idx, const OpSequence &op_seq) { + VERBOSE(OpSequences) << idx.value() << "] " << getStrFromOpSeq(op_seq, operations) << std::endl; + }); +} + } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc index e3cbce57a..48361f464 100644 --- a/runtime/onert/core/src/ir/OperationDumper.cc +++ b/runtime/onert/core/src/ir/OperationDumper.cc @@ -27,206 +27,137 @@ namespace ir using namespace operation; -OperationDumper::OperationDumper(const std::string &start_msg) +namespace { - VERBOSE(LIR) << start_msg << std::endl; -} - -void OperationDumper::visit(const Abs &node) +void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "") { - VERBOSE(LIR) << "* Abs" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Abs::Input::INPUT) << ")" + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Add &node) +void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "") { - VERBOSE(LIR) << "* Add" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Add::Input::LHS) << ", " - << node.getInputs().at(Add::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const ArgMax &node) -{ - VERBOSE(LIR) << "* ArgMax" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ArgMax::Input::INPUT) << ")" - << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; + VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(0) + << ") " << adding_input << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const AvgPool2D &node) +void dumpConvOp(const Operation &node, const std::string &padding_type) { - VERBOSE(LIR) << "* AvgPool2D(Implicit)" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(AvgPool2D::Input::INPUT) << ")" - << std::endl; + VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel(" + << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias(" + << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl; VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const BatchToSpaceND &node) +void dumpPackingOp(const Operation &node) { - VERBOSE(LIR) << "* BatchToSpaceND" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BatchToSpaceND::Input::INPUT) << ")" - << " BlockSize(" << node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE) << ")" - << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; + std::string inputs; + for (auto i : node.getInputs()) + { + inputs += std::to_string(i.value()) + ","; + } + VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } +} // namespace -void OperationDumper::visit(const operation::BroadcastTo &node) +OperationDumper::OperationDumper(const std::string &start_msg) { - VERBOSE(LIR) << "* BroadcastTo" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(BroadcastTo::Input::INPUT) << ", " - << node.getInputs().at(BroadcastTo::Input::SHAPE) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + VERBOSE(LIR) << start_msg << std::endl; } -void OperationDumper::visit(const Cast &node) -{ - VERBOSE(LIR) << "* Cast" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cast::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ArgMax &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const Comparison &node) +void OperationDumper::visit(const BatchToSpaceND &node) { - VERBOSE(LIR) << "* Comparison" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Comparison::Input::INPUT0) << ", " - << node.getInputs().at(Comparison::Input::INPUT1) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string block_size = + "BlockSize(" + + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) + ")"; + dumpUnaryInputOp(node, block_size); } -void OperationDumper::visit(const Concat &node) -{ - VERBOSE(LIR) << "* Concat" << std::endl; - std::string inputs; - for (auto i : node.getInputs()) - { - inputs += std::to_string(i.value()) + ","; - } - VERBOSE(LIR) << " - Inputs : IFM(" << inputs << ")" << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); } + +void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); } + +void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); } + +void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); } void OperationDumper::visit(const Conv2D &node) { std::string padding_type = node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; - VERBOSE(LIR) << "* Conv2D(" << padding_type << ")" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Conv2D::Input::INPUT) << ") Kernel(" - << node.getInputs().at(Conv2D::Input::KERNEL) << ") Bias(" - << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; + dumpConvOp(node, padding_type); } -void OperationDumper::visit(const ConvertFp16ToFp32 &node) -{ - VERBOSE(LIR) << "* ConvertFp16ToFp32" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp16ToFp32::Input::INPUT) - << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const ConvertFp32ToFp16 &node) -{ - VERBOSE(LIR) << "* ConvertFp32ToFp16" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ConvertFp32ToFp16::Input::INPUT) - << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Cos &node) -{ - VERBOSE(LIR) << "* Cos" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Cos::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const DepthToSpace &node) -{ - VERBOSE(LIR) << "* DepthToSpace" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(DepthToSpace::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const DepthwiseConv2D &node) { std::string padding_type = node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; - VERBOSE(LIR) << "* DepthwiseConv2D(" << padding_type << ")" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(DepthwiseConv2D::Input::INPUT) - << ") Kernel(" << node.getInputs().at(DepthwiseConv2D::Input::KERNEL) << ") Bias(" - << node.getInputs().at(DepthwiseConv2D::Input::BIAS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; + dumpConvOp(node, padding_type); } -void OperationDumper::visit(const Dequantize &node) +void OperationDumper::visit(const ElementwiseActivation &node) { - VERBOSE(LIR) << "* Dequantize" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Dequantize::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string params; + if (node.param().op_type == ElementwiseActivation::Type::RELU) + { + params = " lower value(" + std::to_string(node.param().alpha) + ") upper value(" + + std::to_string(node.param().beta) + ")"; + } + else if (node.param().op_type == ElementwiseActivation::Type::LEAKY_RELU) + { + params = " alpha value(" + std::to_string(node.param().alpha) + ")"; + } + dumpUnaryInputOp(node, params); } -void OperationDumper::visit(const Div &node) -{ - VERBOSE(LIR) << "* Div" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Div::Input::LHS) << ", " - << node.getInputs().at(Div::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); } + +void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const EmbeddingLookup &node) { - VERBOSE(LIR) << "* EmbeddingLookup" << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) << " - Inputs : Lookups(" << node.getInputs().at(EmbeddingLookup::Input::LOOKUPS) << ") VALUES(" << node.getInputs().at(EmbeddingLookup::Input::VALUES) << ")" << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Exp &node) -{ - VERBOSE(LIR) << "* Exp" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Exp::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - void OperationDumper::visit(const ExpandDims &node) { - VERBOSE(LIR) << "* ExpandDims" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ExpandDims::Input::INPUT) - << ") AXIS(" << node.getInputs().at(ExpandDims::Input::AXIS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Floor &node) -{ - VERBOSE(LIR) << "* Floor" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Floor::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string axis = + "AXIS(" + std::to_string(node.getInputs().at(ExpandDims::Input::AXIS).value()) + ")"; + dumpUnaryInputOp(node, axis); } void OperationDumper::visit(const FullyConnected &node) { - VERBOSE(LIR) << "* FullyConnected" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(FullyConnected::Input::INPUT) - << ") Weight(" << node.getInputs().at(FullyConnected::Input::WEIGHT) << ") Bias(" - << node.getInputs().at(FullyConnected::Input::BIAS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; + std::string inputs = + "Weight(" + std::to_string(node.getInputs().at(FullyConnected::Input::WEIGHT).value()) + + ") Bias(" + std::to_string(node.getInputs().at(FullyConnected::Input::BIAS).value()) + ")"; + dumpUnaryInputOp(node, inputs); } void OperationDumper::visit(const Gather &node) { - VERBOSE(LIR) << "* Gather" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Gather::Input::INPUT) << ") Indices(" - << node.getInputs().at(Gather::Input::INDICES) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string indices = + "Indices(" + std::to_string(node.getInputs().at(Gather::Input::INDICES).value()) + ")"; + dumpUnaryInputOp(node, indices); } void OperationDumper::visit(const HashtableLookup &node) @@ -242,36 +173,15 @@ void OperationDumper::visit(const HashtableLookup &node) void OperationDumper::visit(const InstanceNorm &node) { - VERBOSE(LIR) << "* InstanceNorm" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(InstanceNorm::Input::INPUT) - << ") Gamma(" << node.getInputs().at(InstanceNorm::Input::GAMMA) << ") Beta(" - << node.getInputs().at(InstanceNorm::Input::BETA) << ")" << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const L2Normalization &node) -{ - VERBOSE(LIR) << "* L2Normalization" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Normalization::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string inputs = + "Gamma(" + std::to_string(node.getInputs().at(InstanceNorm::Input::GAMMA).value()) + + ") Beta(" + std::to_string(node.getInputs().at(InstanceNorm::Input::BETA).value()) + ")"; + dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const L2Pool2D &node) -{ - VERBOSE(LIR) << "* L2Pool2D" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(L2Pool2D::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const LocalResponseNormalization &node) -{ - VERBOSE(LIR) << "* LocalResponseNormalization" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" - << node.getInputs().at(LocalResponseNormalization::Input::INPUT) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const LSTM &node) { @@ -307,93 +217,12 @@ void OperationDumper::visit(const LSTM &node) << node.getInputs().at(LSTM::Output::OUTPUT) << ")" << std::endl; } -void OperationDumper::visit(const Log &node) -{ - VERBOSE(LIR) << "* Log" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Log::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const LogicalAnd &node) -{ - VERBOSE(LIR) << "* LogicalAnd" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalAnd::Input::INPUT0) << ", " - << node.getInputs().at(LogicalAnd::Input::INPUT1) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const LogicalNot &node) -{ - VERBOSE(LIR) << "* LogicalNot" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalNot::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const LogicalOr &node) -{ - VERBOSE(LIR) << "* LogicalOr" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(LogicalOr::Input::INPUT0) << ", " - << node.getInputs().at(LogicalOr::Input::INPUT1) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Logistic &node) -{ - VERBOSE(LIR) << "* Logistic" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Logistic::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const MaxPool2D &node) -{ - std::string padding_type = - node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; - VERBOSE(LIR) << "* MaxPool2D(" << padding_type << ")" << std::endl; - VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(MaxPool2D::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Mul &node) -{ - VERBOSE(LIR) << "* Mul" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Mul::Input::LHS) << ", " - << node.getInputs().at(Mul::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Neg &node) -{ - VERBOSE(LIR) << "* Neg" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Neg::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Pack &node) -{ - VERBOSE(LIR) << "* Pack" << std::endl; - std::string inputs; - const auto &input_indices = node.getInputs(); - for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) - { - inputs += std::to_string(it->value()); - if (std::next(it) != std::end(input_indices)) - inputs += ", "; - } - VERBOSE(LIR) << " - Inputs : Inputs(" << inputs << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); } void OperationDumper::visit(const Pad &node) { - VERBOSE(LIR) << "* Pad" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pad::Input::INPUT) << ") Pad(" - << node.getInputs().at(Pad::Input::PAD) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string pad = "Pad(" + std::to_string(node.getInputs().at(Pad::Input::PAD).value()) + ")"; + dumpUnaryInputOp(node, pad); } void OperationDumper::visit(const Permute &node) @@ -417,86 +246,46 @@ void OperationDumper::visit(const Permute &node) VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Pow &node) +void OperationDumper::visit(const Pool2D &node) { - VERBOSE(LIR) << "* Pow" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Pow::Input::LHS) << ", " - << node.getInputs().at(Pow::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const PReLU &node) -{ - VERBOSE(LIR) << "* PReLU" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(PReLU::Input::INPUT) << ") Alpha(" - << node.getInputs().at(PReLU::Input::ALPHA) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Reduce &node) -{ - VERBOSE(LIR) << "* " + node.name() << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reduce::Input::INPUT) << ")" + std::string padding_type = + node.param().padding.type == PaddingType::EXPLICIT ? "Explicit" : "Implicit"; + VERBOSE(LIR) << "* " << node.name() << "(" << padding_type << ")" << std::endl; + VERBOSE(LIR) << " - Inputs : IFM(" << node.getInputs().at(Pool2D::Input::INPUT) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const ReLU &node) -{ - VERBOSE(LIR) << "* ReLU" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); } -void OperationDumper::visit(const ReLU1 &node) +void OperationDumper::visit(const PReLU &node) { - VERBOSE(LIR) << "* ReLU1" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU1::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string alpha = + "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")"; + dumpUnaryInputOp(node, alpha); } -void OperationDumper::visit(const ReLU6 &node) -{ - VERBOSE(LIR) << "* ReLU6" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ReLU6::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); } + +void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const Reshape &node) { - VERBOSE(LIR) << "* Reshape" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reshape::Input::INPUT) << ")"; // optional param - if (node.getInputs().size() == 2) - { - VERBOSE(LIR) << " Shape(" << node.getInputs().at(Reshape::Input::SHAPE) << ")"; - } - else - { - VERBOSE(LIR) << " Shape(not provided)"; - } - VERBOSE(LIR) << std::endl; - - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string shape = + node.getInputs().size() == 2 + ? "Shape(" + std::to_string(node.getInputs().at(Reshape::Input::SHAPE).value()) + ")" + : "Shape(not provided)"; + dumpUnaryInputOp(node, shape); } -void OperationDumper::visit(const ResizeBilinear &node) -{ - VERBOSE(LIR) << "* ResizeBilinear" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ResizeBilinear::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ResizeBilinear &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const Reverse &node) { - VERBOSE(LIR) << "* Reverse" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Reverse::Input::INPUT) << ") Axis(" - << node.getInputs().at(Reverse::Input::AXIS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string axis = + "Axis(" + std::to_string(node.getInputs().at(Reverse::Input::AXIS).value()) + ")"; + dumpUnaryInputOp(node, axis); } void OperationDumper::visit(const RNN &node) @@ -512,162 +301,65 @@ void OperationDumper::visit(const RNN &node) << std::endl; } -void OperationDumper::visit(const Round &node) -{ - VERBOSE(LIR) << "* Round" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Round::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - void OperationDumper::visit(const Range &node) { VERBOSE(LIR) << "* Range" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Range::Input::START) << ")" + VERBOSE(LIR) << " - Inputs : Start(" << node.getInputs().at(Range::Input::START) << ")" << " Limit(" << node.getInputs().at(Range::Input::LIMIT) << ")" << " Delta(" << node.getInputs().at(Range::Input::DELTA) << ")" << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const RSQRT &node) -{ - VERBOSE(LIR) << "* RSQRT" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(RSQRT::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - void OperationDumper::visit(const Select &node) { VERBOSE(LIR) << "* Select" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Select::Input::CONDITION) << ")" + VERBOSE(LIR) << " - Inputs : Condition(" << node.getInputs().at(Select::Input::CONDITION) << ")" << " Input_X(" << node.getInputs().at(Select::Input::INPUT_TRUE) << ")" << " Input_Y(" << node.getInputs().at(Select::Input::INPUT_FALSE) << ")" << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const ir::operation::Shape &node) -{ - VERBOSE(LIR) << "* Shape" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ir::operation::Shape::Input::INPUT) - << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Sin &node) -{ - VERBOSE(LIR) << "* Sin" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sin::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const Softmax &node) -{ - VERBOSE(LIR) << "* Softmax" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Softmax::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const SpaceToBatchND &node) { - VERBOSE(LIR) << "* SpaceToBatchND" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToBatchND::Input::INPUT) - << ") BlockSize(" << node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE) - << ") Paddings(" << node.getInputs().at(SpaceToBatchND::Input::PADDINGS) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string inputs = + "BlockSize(" + + std::to_string(node.getInputs().at(SpaceToBatchND::Input::BLOCK_SIZE).value()) + + ") Paddings(" + std::to_string(node.getInputs().at(SpaceToBatchND::Input::PADDINGS).value()) + + ")"; + dumpUnaryInputOp(node, inputs); } -void OperationDumper::visit(const SpaceToDepth &node) -{ - VERBOSE(LIR) << "* SpaceToDepth" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SpaceToDepth::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const Split &node) -{ - VERBOSE(LIR) << "* Split" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Split::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const SQRT &node) -{ - VERBOSE(LIR) << "* SQRT" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SQRT::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Split &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const SquaredDifference &node) -{ - VERBOSE(LIR) << "* SquaredDifference" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(SquaredDifference::Input::LHS) - << ", " << node.getInputs().at(SquaredDifference::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); } void OperationDumper::visit(const StatelessRandomUniform &node) { VERBOSE(LIR) << "* StatelessRandomUniform" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE) - << ", " << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Squeeze &node) -{ - VERBOSE(LIR) << "* Squeeze" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Squeeze::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Slice &node) -{ - VERBOSE(LIR) << "* Slice" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Slice::Input::INPUT) << ")" + VERBOSE(LIR) << " - Inputs : Shape(" << node.getInputs().at(StatelessRandomUniform::Input::SHAPE) + << " Seed(" << node.getInputs().at(StatelessRandomUniform::Input::SEED) << ")" << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const StridedSlice &node) -{ - VERBOSE(LIR) << "* StridedSlice" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(StridedSlice::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const Sub &node) -{ - VERBOSE(LIR) << "* Sub" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Sub::Input::LHS) << ", " - << node.getInputs().at(Sub::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); } -void OperationDumper::visit(const Tanh &node) -{ - VERBOSE(LIR) << "* TanH" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tanh::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const StridedSlice &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const Tile &node) { - VERBOSE(LIR) << "* Tile" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Tile::Input::INPUT) << ", " - << node.getInputs().at(Tile::Input::MULTIPLES) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; + std::string multiples = + "Multiples(" + std::to_string(node.getInputs().at(Tile::Input::MULTIPLES).value()) + ")"; + dumpUnaryInputOp(node, multiples); } void OperationDumper::visit(const TopKV2 &node) @@ -692,17 +384,11 @@ void OperationDumper::visit(const TransposeConv &node) VERBOSE(LIR) << " - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl; } -void OperationDumper::visit(const Transpose &node) -{ - VERBOSE(LIR) << "* Transpose" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Transpose::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} +void OperationDumper::visit(const Transpose &node) { dumpUnaryInputOp(node); } void OperationDumper::visit(const Unpack &node) { - VERBOSE(LIR) << "* Unpack" << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")" << std::endl; std::string outputs; @@ -716,25 +402,9 @@ void OperationDumper::visit(const Unpack &node) VERBOSE(LIR) << " - Outputs : Outputs(" << outputs << ")" << std::endl; } -void OperationDumper::visit(const Min &node) -{ - VERBOSE(LIR) << "* Min" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Min::Input::LHS) << ", " - << node.getInputs().at(Min::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - -void OperationDumper::visit(const Max &node) -{ - VERBOSE(LIR) << "* Max" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(Max::Input::LHS) << ", " - << node.getInputs().at(Max::Input::RHS) << ")" << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - void OperationDumper::visit(const OneHot &node) { - VERBOSE(LIR) << "* OneHot" << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; VERBOSE(LIR) << " - Inputs : " << "Indices(" << node.getInputs().at(OneHot::Input::INDICES) << ") " << std::endl; VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; @@ -742,7 +412,7 @@ void OperationDumper::visit(const OneHot &node) void OperationDumper::visit(const If &node) { - VERBOSE(LIR) << "* If" << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; std::string inputs; const auto &input_indices = node.getInputs(); for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) @@ -767,7 +437,7 @@ void OperationDumper::visit(const If &node) void OperationDumper::visit(const While &node) { - VERBOSE(LIR) << "* While" << std::endl; + VERBOSE(LIR) << "* " << node.name() << std::endl; std::string inputs; const auto &input_indices = node.getInputs(); for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it) @@ -790,13 +460,5 @@ void OperationDumper::visit(const While &node) VERBOSE(LIR) << " - Output : Outputs(" << outputs << ")" << std::endl; } -void OperationDumper::visit(const ZerosLike &node) -{ - VERBOSE(LIR) << "* RoZerosLike" << std::endl; - VERBOSE(LIR) << " - Inputs : Input(" << node.getInputs().at(ZerosLike::Input::INPUT) << ")" - << std::endl; - VERBOSE(LIR) << " - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl; -} - } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/OperationDumper.h b/runtime/onert/core/src/ir/OperationDumper.h index d83f1493f..e8ab3b3cd 100644 --- a/runtime/onert/core/src/ir/OperationDumper.h +++ b/runtime/onert/core/src/ir/OperationDumper.h @@ -31,85 +31,61 @@ public: OperationDumper(const std::string &start_msg); public: - void visit(const operation::Abs &) override; - void visit(const operation::Add &node) override; void visit(const operation::ArgMax &) override; - void visit(const operation::AvgPool2D &node) override; void visit(const operation::BatchToSpaceND &node) override; + void visit(const operation::BinaryArithmetic &node) override; void visit(const operation::BroadcastTo &) override; - void visit(const operation::Cast &) override; void visit(const operation::Comparison &) override; void visit(const operation::Concat &node) override; void visit(const operation::Conv2D &node) override; void visit(const operation::ConvertFp16ToFp32 &node) override; void visit(const operation::ConvertFp32ToFp16 &node) override; - void visit(const operation::Cos &node) override; void visit(const operation::DepthToSpace &) override; void visit(const operation::DepthwiseConv2D &node) override; - void visit(const operation::Dequantize &) override; - void visit(const operation::Div &) override; + void visit(const operation::ElementwiseActivation &) override; + void visit(const operation::ElementwiseBinary &) override; + void visit(const operation::ElementwiseUnary &) override; void visit(const operation::EmbeddingLookup &) override; - void visit(const operation::Exp &) override; void visit(const operation::ExpandDims &) override; - void visit(const operation::Floor &) override; void visit(const operation::FullyConnected &node) override; void visit(const operation::Gather &) override; void visit(const operation::HashtableLookup &) override; void visit(const operation::InstanceNorm &) override; void visit(const operation::L2Normalization &) override; - void visit(const operation::L2Pool2D &) override; void visit(const operation::LocalResponseNormalization &) override; - void visit(const operation::Log &) override; - void visit(const operation::LogicalAnd &) override; - void visit(const operation::LogicalNot &) override; - void visit(const operation::LogicalOr &) override; - void visit(const operation::Logistic &) override; void visit(const operation::LSTM &) override; - void visit(const operation::MaxPool2D &node) override; - void visit(const operation::Mul &) override; - void visit(const operation::Neg &) override; void visit(const operation::Pack &) override; void visit(const operation::Pad &) override; void visit(const operation::Permute &node) override; + void visit(const operation::Pool2D &node) override; void visit(const operation::Pow &node) override; void visit(const operation::PReLU &) override; void visit(const operation::Range &) override; + void visit(const operation::Rank &) override; void visit(const operation::Reduce &) override; - void visit(const operation::ReLU &) override; - void visit(const operation::ReLU1 &) override; - void visit(const operation::ReLU6 &) override; void visit(const operation::Reshape &node) override; void visit(const operation::ResizeBilinear &) override; void visit(const operation::Reverse &) override; void visit(const operation::RNN &) override; - void visit(const operation::Round &) override; - void visit(const operation::RSQRT &) override; void visit(const operation::Select &node) override; void visit(const operation::Shape &node) override; - void visit(const operation::Sin &node) override; void visit(const operation::Softmax &node) override; void visit(const operation::SpaceToBatchND &) override; void visit(const operation::SpaceToDepth &) override; void visit(const operation::Split &) override; - void visit(const operation::SQRT &) override; void visit(const operation::SquaredDifference &) override; void visit(const operation::Squeeze &) override; void visit(const operation::Slice &) override; void visit(const operation::StridedSlice &) override; void visit(const operation::StatelessRandomUniform &) override; - void visit(const operation::Sub &) override; - void visit(const operation::Tanh &) override; void visit(const operation::Tile &) override; void visit(const operation::TopKV2 &) override; void visit(const operation::TransposeConv &) override; void visit(const operation::Transpose &) override; void visit(const operation::Unpack &) override; - void visit(const operation::Min &) override; - void visit(const operation::Max &) override; void visit(const operation::OneHot &) override; void visit(const operation::If &) override; void visit(const operation::While &) override; - void visit(const operation::ZerosLike &) override; }; } // namespace ir diff --git a/runtime/onert/core/src/ir/Padding.cc b/runtime/onert/core/src/ir/Padding.cc index 31969911f..d74f80217 100644 --- a/runtime/onert/core/src/ir/Padding.cc +++ b/runtime/onert/core/src/ir/Padding.cc @@ -50,7 +50,7 @@ inline ExplicitPadding validPadding(void) } inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const Stride &stride, - uint32_t kw, uint32_t kh) + uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf) { ExplicitPadding padding; @@ -61,14 +61,19 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const // padding_to_beginning = total_padding / 2 // padding_to_end = (total_padding + 1)/2. // + const int32_t effective_filter_h_size = (kh - 1) * dhf + 1; + const int32_t effective_filter_w_size = (kw - 1) * dwf + 1; + const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; const int32_t horizontal_expected_output = (ifm_shape.W + stride.horizontal - 1) / stride.horizontal; - const int32_t vertical_needed_input = (vertical_expected_output - 1) * stride.vertical + kh; + const int32_t vertical_needed_input = + (vertical_expected_output - 1) * stride.vertical + effective_filter_h_size; const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H); - const int32_t horizontal_needed_input = (horizontal_expected_output - 1) * stride.horizontal + kw; + const int32_t horizontal_needed_input = + (horizontal_expected_output - 1) * stride.horizontal + effective_filter_w_size; const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W); padding.top = vertical_total_padding / 2; @@ -80,7 +85,8 @@ inline ExplicitPadding samePaddingUsingIFM(const FeatureShape &ifm_shape, const } inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, - const Stride &stride, uint32_t kw, uint32_t kh) + const Stride &stride, uint32_t kw, uint32_t kh, uint32_t dwf, + uint32_t dhf) { const int32_t vertical_expected_output = (ifm_shape.H + stride.vertical - 1) / stride.vertical; const int32_t horizontal_expected_output = @@ -92,7 +98,7 @@ inline ExplicitPadding samePadding(const FeatureShape &ifm_shape, const FeatureS UNUSED_RELEASE(vertical_expected_output); UNUSED_RELEASE(horizontal_expected_output); - return samePaddingUsingIFM(ifm_shape, stride, kw, kh); + return samePaddingUsingIFM(ifm_shape, stride, kw, kh, dwf, dhf); } } // namespace @@ -130,7 +136,7 @@ Padding::Padding(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShape &ifm_shape, const FeatureShape &ofm_shape, const Stride &stride, - uint32_t kw, uint32_t kh) + uint32_t kw, uint32_t kh, uint32_t dwf, uint32_t dhf) { if (padding.type == PaddingType::EXPLICIT) { @@ -138,7 +144,7 @@ const ExplicitPadding calculatePadding(const Padding &padding, const FeatureShap } else if (padding.type == PaddingType::SAME) { - return samePadding(ifm_shape, ofm_shape, stride, kw, kh); + return samePadding(ifm_shape, ofm_shape, stride, kw, kh, dwf, dhf); } else if (padding.type == PaddingType::VALID) { diff --git a/runtime/onert/core/src/ir/operation/Abs.cc b/runtime/onert/core/src/ir/operation/Abs.cc deleted file mode 100644 index b06705d07..000000000 --- a/runtime/onert/core/src/ir/operation/Abs.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Abs.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Abs::accept(OperationVisitor &v) const { v.visit(*this); } - -Abs::Abs(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Add.cc b/runtime/onert/core/src/ir/operation/Add.cc deleted file mode 100644 index 2fa30f8ed..000000000 --- a/runtime/onert/core/src/ir/operation/Add.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Add.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Add::accept(OperationVisitor &v) const { v.visit(*this); } - -Add::Add(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/AvgPool2D.cc b/runtime/onert/core/src/ir/operation/AvgPool2D.cc deleted file mode 100644 index 28d4fcb54..000000000 --- a/runtime/onert/core/src/ir/operation/AvgPool2D.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/AvgPool2D.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void AvgPool2D::accept(OperationVisitor &v) const { v.visit(*this); } - -AvgPool2D::AvgPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Quantize.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc index 0e3d5b69b..2b1422c73 100644 --- a/runtime/onert/core/src/ir/operation/Quantize.cc +++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc @@ -14,7 +14,10 @@ * limitations under the License. */ -#include "ir/operation/Quantize.h" +#include "ir/operation/BinaryArithmetic.h" + +#include <cassert> +#include <unordered_map> #include "ir/OperationVisitor.h" @@ -25,11 +28,23 @@ namespace ir namespace operation { -void Quantize::accept(OperationVisitor &v) const { v.visit(*this); } +void BinaryArithmetic::accept(OperationVisitor &v) const { v.visit(*this); } + +BinaryArithmetic::BinaryArithmetic(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} -Quantize::Quantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} +std::string BinaryArithmetic::name() const { + using ArithmeticType = onert::ir::operation::BinaryArithmetic::ArithmeticType; + static const std::unordered_map<ArithmeticType, std::string> name_map{ + {ArithmeticType::ADD, std::string{"Add"}}, + {ArithmeticType::SUB, std::string{"Sub"}}, + {ArithmeticType::MUL, std::string{"Mul"}}, + {ArithmeticType::DIV, std::string{"Div"}}}; + return name_map.at(_param.arithmetic_type); } } // namespace operation diff --git a/runtime/onert/core/src/ir/operation/Cast.cc b/runtime/onert/core/src/ir/operation/Cast.cc deleted file mode 100644 index 09d9c327e..000000000 --- a/runtime/onert/core/src/ir/operation/Cast.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Cast.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Cast::accept(OperationVisitor &v) const { v.visit(*this); } - -Cast::Cast(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Dequantize.cc b/runtime/onert/core/src/ir/operation/Dequantize.cc deleted file mode 100644 index 14d6362bd..000000000 --- a/runtime/onert/core/src/ir/operation/Dequantize.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Dequantize.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Dequantize::accept(OperationVisitor &v) const { v.visit(*this); } - -Dequantize::Dequantize(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Div.cc b/runtime/onert/core/src/ir/operation/Div.cc deleted file mode 100644 index b095d9811..000000000 --- a/runtime/onert/core/src/ir/operation/Div.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Div.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Div::accept(OperationVisitor &v) const { v.visit(*this); } - -Div::Div(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc new file mode 100644 index 000000000..f6718b656 --- /dev/null +++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ElementwiseActivation.h" + +#include <cassert> +#include <unordered_map> + +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void ElementwiseActivation::accept(OperationVisitor &v) const { v.visit(*this); } + +ElementwiseActivation::ElementwiseActivation(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ + if (param.op_type == Type::LOGISTIC) + { + assert(param.alpha == 0.0f && param.beta == 0.0f && "Logistic will be supported only as " + "sigmoid function(L=1, k=1, x0=0). So, do " + "not use alpha and beta"); + } + else if (param.op_type == Type::RELU) + { + assert(param.alpha >= param.beta && "ReLU's alpha must be equal or greater than beta"); + } + else if (param.op_type == Type::TANH) + { + assert(param.alpha == 1.0f && param.beta == 1.0f && "f(x) = alpha * tanh(beta * x), Tanh is " + "supported only the values of alpha and " + "beta are 1.f"); + } +} + +std::string ElementwiseActivation::name() const +{ + using ElementwiseActivationType = onert::ir::operation::ElementwiseActivation::Type; + static const std::unordered_map<Type, std::string> name_map{ + {ElementwiseActivationType::ELU, "ELU"}, + {ElementwiseActivationType::LOGISTIC, "Logistic"}, + {ElementwiseActivationType::RELU, "ReLU"}, + {ElementwiseActivationType::TANH, "Tanh"}, + {ElementwiseActivationType::LEAKY_RELU, "LeakyRelu"}}; + return name_map.at(_param.op_type); +} + +float ElementwiseActivation::infinity = std::numeric_limits<float>::infinity(); + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc new file mode 100644 index 000000000..3287fc0a3 --- /dev/null +++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ElementwiseBinary.h" + +#include <cassert> +#include <unordered_map> + +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void ElementwiseBinary::accept(OperationVisitor &v) const { v.visit(*this); } + +ElementwiseBinary::ElementwiseBinary(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} +{ +} + +std::string ElementwiseBinary::name() const +{ + using ElementwiseBinaryType = onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType; + static const std::unordered_map<ElementwiseBinaryType, std::string> name_map{ + {ElementwiseBinaryType::LOGICAL_AND, std::string{"LogicalAnd"}}, + {ElementwiseBinaryType::LOGICAL_OR, std::string{"LogicalOr"}}, + {ElementwiseBinaryType::MAX, std::string{"Max"}}, + {ElementwiseBinaryType::MIN, std::string{"Min"}}}; + return name_map.at(_param.op_type); +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc new file mode 100644 index 000000000..7dfcd4a98 --- /dev/null +++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/operation/ElementwiseUnary.h" + +#include <cassert> +#include <unordered_map> + +#include "ir/OperationVisitor.h" + +namespace onert +{ +namespace ir +{ +namespace operation +{ + +void ElementwiseUnary::accept(OperationVisitor &v) const { v.visit(*this); } + +ElementwiseUnary::ElementwiseUnary(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} +{ +} + +std::string ElementwiseUnary::name() const +{ + using ElementwiseUnaryType = onert::ir::operation::ElementwiseUnary::Type; + static const std::unordered_map<ElementwiseUnaryType, std::string> name_map{ + {ElementwiseUnaryType::ABS, std::string{"Abs"}}, + {ElementwiseUnaryType::CAST, std::string{"Cast"}}, + {ElementwiseUnaryType::COS, std::string{"Cos"}}, + {ElementwiseUnaryType::DEQUANTIZE, std::string{"Dequantize"}}, + {ElementwiseUnaryType::ERF, std::string{"Erf"}}, + {ElementwiseUnaryType::EXP, std::string{"Exp"}}, + {ElementwiseUnaryType::FLOOR, std::string{"Floor"}}, + {ElementwiseUnaryType::LOG, std::string{"Log"}}, + {ElementwiseUnaryType::LOGICAL_NOT, std::string{"LogicalNot"}}, + {ElementwiseUnaryType::NEG, std::string{"Neg"}}, + {ElementwiseUnaryType::QUANTIZE, std::string{"Quantize"}}, + {ElementwiseUnaryType::ROUND, std::string{"Round"}}, + {ElementwiseUnaryType::RSQRT, std::string{"RSqrt"}}, + {ElementwiseUnaryType::SIN, std::string{"Sin"}}, + {ElementwiseUnaryType::SQRT, std::string{"Sqrt"}}, + {ElementwiseUnaryType::SQURE, std::string{"Squre"}}, + {ElementwiseUnaryType::ZEROS_LIKE, std::string{"ZerosLike"}}}; + return name_map.at(_param.op_type); +} + +} // namespace operation +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Exp.cc b/runtime/onert/core/src/ir/operation/Exp.cc deleted file mode 100644 index 0b22e080a..000000000 --- a/runtime/onert/core/src/ir/operation/Exp.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Exp.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Exp::accept(OperationVisitor &v) const { v.visit(*this); } - -Exp::Exp(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Floor.cc b/runtime/onert/core/src/ir/operation/Floor.cc deleted file mode 100644 index dc01535ad..000000000 --- a/runtime/onert/core/src/ir/operation/Floor.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Floor.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Floor::accept(OperationVisitor &v) const { v.visit(*this); } - -Floor::Floor(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/L2Pool2D.cc b/runtime/onert/core/src/ir/operation/L2Pool2D.cc deleted file mode 100644 index 8f21b93e0..000000000 --- a/runtime/onert/core/src/ir/operation/L2Pool2D.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/L2Pool2D.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void L2Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } - -L2Pool2D::L2Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/LogicalAnd.cc b/runtime/onert/core/src/ir/operation/LogicalAnd.cc deleted file mode 100644 index 0d50706ca..000000000 --- a/runtime/onert/core/src/ir/operation/LogicalAnd.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/LogicalAnd.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void LogicalAnd::accept(OperationVisitor &v) const { v.visit(*this); } - -LogicalAnd::LogicalAnd(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/LogicalNot.cc b/runtime/onert/core/src/ir/operation/LogicalNot.cc deleted file mode 100644 index 8f1142102..000000000 --- a/runtime/onert/core/src/ir/operation/LogicalNot.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/LogicalNot.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void LogicalNot::accept(OperationVisitor &v) const { v.visit(*this); } - -LogicalNot::LogicalNot(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/LogicalOr.cc b/runtime/onert/core/src/ir/operation/LogicalOr.cc deleted file mode 100644 index d75207c4a..000000000 --- a/runtime/onert/core/src/ir/operation/LogicalOr.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/LogicalOr.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void LogicalOr::accept(OperationVisitor &v) const { v.visit(*this); } - -LogicalOr::LogicalOr(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Logistic.cc b/runtime/onert/core/src/ir/operation/Logistic.cc deleted file mode 100644 index 77d9d17de..000000000 --- a/runtime/onert/core/src/ir/operation/Logistic.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Logistic.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Logistic::accept(OperationVisitor &v) const { v.visit(*this); } - -Logistic::Logistic(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Max.cc b/runtime/onert/core/src/ir/operation/Max.cc deleted file mode 100644 index 281f9d451..000000000 --- a/runtime/onert/core/src/ir/operation/Max.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Max.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Max::accept(OperationVisitor &v) const { v.visit(*this); } - -Max::Max(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/MaxPool2D.cc b/runtime/onert/core/src/ir/operation/MaxPool2D.cc deleted file mode 100644 index eac53cc5e..000000000 --- a/runtime/onert/core/src/ir/operation/MaxPool2D.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/MaxPool2D.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void MaxPool2D::accept(OperationVisitor &v) const { v.visit(*this); } - -MaxPool2D::MaxPool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Min.cc b/runtime/onert/core/src/ir/operation/Min.cc deleted file mode 100644 index 8be7f0cc8..000000000 --- a/runtime/onert/core/src/ir/operation/Min.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Min.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Min::accept(OperationVisitor &v) const { v.visit(*this); } - -Min::Min(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Mul.cc b/runtime/onert/core/src/ir/operation/Mul.cc deleted file mode 100644 index 03cdf1b61..000000000 --- a/runtime/onert/core/src/ir/operation/Mul.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Mul.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Mul::accept(OperationVisitor &v) const { v.visit(*this); } - -Mul::Mul(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Neg.cc b/runtime/onert/core/src/ir/operation/Neg.cc deleted file mode 100644 index df623a13b..000000000 --- a/runtime/onert/core/src/ir/operation/Neg.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Neg.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Neg::accept(OperationVisitor &v) const { v.visit(*this); } - -Neg::Neg(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Pad.cc b/runtime/onert/core/src/ir/operation/Pad.cc index aecc2d994..0c56e92e3 100644 --- a/runtime/onert/core/src/ir/operation/Pad.cc +++ b/runtime/onert/core/src/ir/operation/Pad.cc @@ -27,8 +27,10 @@ namespace operation void Pad::accept(OperationVisitor &v) const { v.visit(*this); } +// PAD: 2 inputs +// PADV2: 3 inputs Pad::Pad(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(2u), inputs, outputs} + : Operation{OperandConstraint::createInRange(2u, 3u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/Sin.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc index 631505f36..761d14c3d 100644 --- a/runtime/onert/core/src/ir/operation/Sin.cc +++ b/runtime/onert/core/src/ir/operation/Pool2D.cc @@ -14,9 +14,10 @@ * limitations under the License. */ -#include "ir/operation/Sin.h" +#include "ir/operation/Pool2D.h" #include <cassert> +#include <unordered_map> #include "ir/OperationVisitor.h" @@ -27,13 +28,24 @@ namespace ir namespace operation { -void Sin::accept(OperationVisitor &v) const { v.visit(*this); } +void Pool2D::accept(OperationVisitor &v) const { v.visit(*this); } -Sin::Sin(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} +Pool2D::Pool2D(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } +std::string Pool2D::name() const +{ + using PoolType = onert::ir::operation::Pool2D::PoolType; + static const std::unordered_map<PoolType, std::string> name_map{ + {PoolType::AVG, "Avg" + std::string{toString(opcode())}}, + {PoolType::L2, "L2" + std::string{toString(opcode())}}, + {PoolType::MAX, "Max" + std::string{toString(opcode())}}}; + return name_map.at(_param.op_type); +} + } // namespace operation } // namespace ir } // namespace onert diff --git a/runtime/onert/core/src/ir/operation/RSQRT.cc b/runtime/onert/core/src/ir/operation/RSQRT.cc deleted file mode 100644 index 2bce1fa28..000000000 --- a/runtime/onert/core/src/ir/operation/RSQRT.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/RSQRT.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void RSQRT::accept(OperationVisitor &v) const { v.visit(*this); } - -RSQRT::RSQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Cos.cc b/runtime/onert/core/src/ir/operation/Rank.cc index 831a92dbd..c357e9018 100644 --- a/runtime/onert/core/src/ir/operation/Cos.cc +++ b/runtime/onert/core/src/ir/operation/Rank.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ir/operation/Cos.h" +#include "ir/operation/Rank.h" #include <cassert> @@ -27,9 +27,9 @@ namespace ir namespace operation { -void Cos::accept(OperationVisitor &v) const { v.visit(*this); } +void Rank::accept(OperationVisitor &v) const { v.visit(*this); } -Cos::Cos(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) +Rank::Rank(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) : Operation{OperandConstraint::createExact(1u), inputs, outputs} { } diff --git a/runtime/onert/core/src/ir/operation/ReLU.cc b/runtime/onert/core/src/ir/operation/ReLU.cc deleted file mode 100644 index f0c88478b..000000000 --- a/runtime/onert/core/src/ir/operation/ReLU.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/ReLU.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void ReLU::accept(OperationVisitor &v) const { v.visit(*this); } - -ReLU::ReLU(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ReLU1.cc b/runtime/onert/core/src/ir/operation/ReLU1.cc deleted file mode 100644 index 734f0b65b..000000000 --- a/runtime/onert/core/src/ir/operation/ReLU1.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/ReLU1.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void ReLU1::accept(OperationVisitor &v) const { v.visit(*this); } - -ReLU1::ReLU1(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ReLU6.cc b/runtime/onert/core/src/ir/operation/ReLU6.cc deleted file mode 100644 index 5972329af..000000000 --- a/runtime/onert/core/src/ir/operation/ReLU6.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/ReLU6.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void ReLU6::accept(OperationVisitor &v) const { v.visit(*this); } - -ReLU6::ReLU6(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Round.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc index 16dfb2b1b..9f17af97c 100644 --- a/runtime/onert/core/src/ir/operation/Round.cc +++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "ir/operation/Round.h" +#include "ir/operation/ResizeNearestNeighbor.h" #include <cassert> @@ -27,10 +27,12 @@ namespace ir namespace operation { -void Round::accept(OperationVisitor &v) const { v.visit(*this); } +void ResizeNearestNeighbor::accept(OperationVisitor &v) const { v.visit(*this); } -Round::Round(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} +ResizeNearestNeighbor::ResizeNearestNeighbor(const OperandIndexSequence &inputs, + const OperandIndexSequence &outputs, + const Param ¶m) + : Operation{OperandConstraint::createExact(1u), inputs, outputs}, _param{param} { } diff --git a/runtime/onert/core/src/ir/operation/SQRT.cc b/runtime/onert/core/src/ir/operation/SQRT.cc deleted file mode 100644 index ad887d89a..000000000 --- a/runtime/onert/core/src/ir/operation/SQRT.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/SQRT.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void SQRT::accept(OperationVisitor &v) const { v.visit(*this); } - -SQRT::SQRT(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Sub.cc b/runtime/onert/core/src/ir/operation/Sub.cc deleted file mode 100644 index d71071686..000000000 --- a/runtime/onert/core/src/ir/operation/Sub.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Sub.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Sub::accept(OperationVisitor &v) const { v.visit(*this); } - -Sub::Sub(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, - const Param ¶m) - : Operation{OperandConstraint::createExact(2u), inputs, outputs}, _param{param} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/Tanh.cc b/runtime/onert/core/src/ir/operation/Tanh.cc deleted file mode 100644 index 8fab0c0f3..000000000 --- a/runtime/onert/core/src/ir/operation/Tanh.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/Tanh.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void Tanh::accept(OperationVisitor &v) const { v.visit(*this); } - -Tanh::Tanh(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/operation/ZerosLike.cc b/runtime/onert/core/src/ir/operation/ZerosLike.cc deleted file mode 100644 index 5f49b98d1..000000000 --- a/runtime/onert/core/src/ir/operation/ZerosLike.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ir/operation/ZerosLike.h" - -#include <cassert> - -#include "ir/OperationVisitor.h" - -namespace onert -{ -namespace ir -{ -namespace operation -{ - -void ZerosLike::accept(OperationVisitor &v) const { v.visit(*this); } - -ZerosLike::ZerosLike(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs) - : Operation{OperandConstraint::createExact(1u), inputs, outputs} -{ -} - -} // namespace operation -} // namespace ir -} // namespace onert diff --git a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h b/runtime/onert/core/src/ir/pass/PermutationOperationPass.h deleted file mode 100644 index 6dec9ea8f..000000000 --- a/runtime/onert/core/src/ir/pass/PermutationOperationPass.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ -#define __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ - -#include "ir/OperationVisitor.h" -#include "LoweredOperationPass.h" - -namespace onert -{ -namespace ir -{ -namespace pass -{ - -class PermutationOperationPass : public LoweredOperationPass, public OperationVisitor -{ -public: - using LoweredOperationPass::LoweredOperationPass; - -public: - std::string id() final { return "PermutationOperationPass"; } - -public: - void callback(const OperationIndex &i, Operation &n) final; - -public: - void visit(const operation::Add &) final; - void visit(const operation::Comparison &) final; - void visit(const operation::Concat &) final; - void visit(const operation::Div &) final; - void visit(const operation::LogicalAnd &) final; - void visit(const operation::LogicalNot &) final; - void visit(const operation::LogicalOr &) final; - void visit(const operation::Max &) final; - void visit(const operation::Min &) final; - void visit(const operation::Mul &) final; - void visit(const operation::Pack &) final; - void visit(const operation::PReLU &) final; - void visit(const operation::SquaredDifference &) final; - void visit(const operation::Sub &) final; - void visit(const operation::Unpack &) final; - void visit(const operation::FullyConnected &) final; - void visit(const operation::Gather &) final; - void visit(const operation::Reshape &) final; - -private: - void applyExpandRanks(const Operation &); - void changeToKeepLayout(const Operation &); -}; - -} // namespace pass -} // namespace ir -} // namespace onert - -#endif // __ONERT_GRAPH_PASS_PERMUTATION_OPERATION_PASS_H__ diff --git a/runtime/onert/core/src/util/EventRecorder.cc b/runtime/onert/core/src/util/EventRecorder.cc index ec7f92117..13a599bed 100644 --- a/runtime/onert/core/src/util/EventRecorder.cc +++ b/runtime/onert/core/src/util/EventRecorder.cc @@ -21,7 +21,12 @@ #include <unordered_map> #include <json/json.h> #include <assert.h> +#include <utility> +#include <map> +#include <set> +#include <stdint.h> +// json type for Chrome Event Trace namespace { @@ -110,6 +115,290 @@ std::string object(const CounterEvent &evt) } // namespace +// md table type +namespace +{ + +void writeMDTableRow(std::ostream &os, const std::vector<std::string> &list) +{ + os << "| "; + for (auto &key : list) + { + os << key << " | "; + } + os << "\n"; +} + +struct MDContent +{ + std::string name; + uint64_t begin_ts; + uint64_t end_ts; + uint32_t min_rss; + uint32_t max_rss; + uint32_t min_page_reclaims; + uint32_t max_page_reclaims; + + MDContent() + : begin_ts(0), end_ts(0), min_rss(UINT32_MAX), max_rss(0), min_page_reclaims(UINT32_MAX), + max_page_reclaims(0) + { + // DO NOTHING + } + + virtual ~MDContent() = default; + + void updateRss(uint32_t rss) + { + if (min_rss == UINT32_MAX) + min_rss = rss; + if (max_rss == 0) + max_rss = rss; + + if (min_rss > rss) + min_rss = rss; + else if (max_rss < rss) + max_rss = rss; + } + + void updateMinflt(uint32_t minflt) + { + if (min_page_reclaims == UINT32_MAX) + min_page_reclaims = minflt; + if (max_page_reclaims == 0) + max_page_reclaims = minflt; + + if (min_page_reclaims > minflt) + min_page_reclaims = minflt; + else if (max_page_reclaims < minflt) + max_page_reclaims = minflt; + } + + virtual void write(std::ostream &os) const = 0; +}; + +struct OpSeq : public MDContent +{ + std::string backend; + uint64_t graph_latency; + + struct OpSeqCmp + { + bool operator()(const OpSeq &lhs, const OpSeq &rhs) const + { + return lhs.begin_ts < rhs.begin_ts; + } + bool operator()(const OpSeq &lhs, const OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; } + bool operator()(OpSeq &lhs, OpSeq &rhs) { return lhs.begin_ts < rhs.begin_ts; } + }; + + void write(std::ostream &os) const override + { + uint64_t opseq_latency = end_ts - begin_ts; + double opseq_per = static_cast<double>(opseq_latency) / graph_latency * 100.0; + writeMDTableRow(os, {name, backend, std::to_string(opseq_latency), std::to_string(opseq_per), + std::to_string(min_rss), std::to_string(max_rss), + std::to_string(min_page_reclaims), std::to_string(max_page_reclaims)}); + } +}; + +struct Graph : public MDContent +{ + std::set<OpSeq, OpSeq::OpSeqCmp> opseqs; + + void setOpSeqs(const std::map<std::string, OpSeq> &name_to_opseq) + { + uint64_t graph_latency = end_ts - begin_ts; + for (auto it : name_to_opseq) + { + auto opseq = it.second; + opseq.graph_latency = graph_latency; + + opseqs.insert(opseq); + + updateRss(opseq.min_rss); + updateRss(opseq.max_rss); + updateMinflt(opseq.min_page_reclaims); + updateMinflt(opseq.max_page_reclaims); + } + } + + void write(std::ostream &os) const override + { + static std::vector<std::string> graph_headers{"latency(us)", "rss_min(kb)", "rss_max(kb)", + "page_reclaims_min", "page_reclaims_max"}; + + static std::vector<std::string> graph_headers_line{"-----------", "-------", "-------", + "-----------------", "-----------------"}; + + // Graph's Header + writeMDTableRow(os, graph_headers); + writeMDTableRow(os, graph_headers_line); + + // Graph's contents + writeMDTableRow(os, {std::to_string(end_ts - begin_ts), std::to_string(min_rss), + std::to_string(max_rss), std::to_string(min_page_reclaims), + std::to_string(max_page_reclaims)}); + + os << "\n"; + + static std::vector<std::string> opseq_headers{ + "OpSeq name", "backend", "latency(us)", "latency(%)", + "rss_min(kb)", "rss_max(kb)", "page_reclaims_min", "page_reclaims_max"}; + + static std::vector<std::string> opseq_headers_line{ + "----------", "-------", "-----------", "-----------", + "-------", "-------", "-----------------", "-----------------"}; + + os << "## OpSequences \n"; + + // OpSeq's Header + writeMDTableRow(os, opseq_headers); + writeMDTableRow(os, opseq_headers_line); + + // OpSeq's contents + for (auto opseq : opseqs) + { + opseq.write(os); + } + + os << "\n"; + } +}; + +struct MDTableBuilder +{ + MDTableBuilder(const std::vector<DurationEvent> &duration_events, + const std::vector<CounterEvent> &counter_events) + : _duration_events(duration_events), _counter_events(counter_events) + { + for (const auto &evt : _counter_events) + { + uint64_t ts = std::stoull(evt.ts); + auto &name = evt.name; + assert(name.compare("maxrss") == 0 || name.compare("minflt") == 0); + assert(evt.values.size() == 1); + auto &val = evt.values.begin()->second; + if (_ts_to_values.find(ts) == _ts_to_values.end()) + { + std::pair<uint32_t, uint32_t> values; + if (name.compare("maxrss") == 0) + values.first = std::stoul(val); + else + values.second = std::stoul(val); + _ts_to_values.insert({ts, values}); + } + else + { + auto &values = _ts_to_values.at(ts); + if (name.compare("maxrss") == 0) + values.first = std::stoul(val); + else + values.second = std::stoul(val); + } + } + } + + MDTableBuilder &build() + { + for (auto &it : divideGraph()) + { + size_t begin_idx = it.first; + size_t end_idx = it.second; + std::map<std::string, OpSeq> name_to_opseq; + for (size_t i = begin_idx + 1; i < end_idx; ++i) + { + const auto &evt = _duration_events[i]; + assert(evt.name.compare("Graph") != 0); + assert(evt.ph.compare("B") == 0 || evt.ph.compare("E") == 0); + if (evt.ph.compare("B") == 0) + { + assert(name_to_opseq.find(evt.name) == name_to_opseq.end()); + name_to_opseq.insert({evt.name, makeOpSeq(evt)}); + } + else + { + assert(name_to_opseq.find(evt.name) != name_to_opseq.end()); + auto &opseq = name_to_opseq.at(evt.name); + updateOpSeq(opseq, evt); + } + } + + _graphs.emplace_back(makeGraph(begin_idx, end_idx, name_to_opseq)); + } + + return *this; + } + + std::vector<std::pair<size_t, size_t>> divideGraph() + { + std::vector<std::pair<size_t, size_t>> graph_idx_list; // pair<begin_idx, end_idx> + for (size_t i = 0, begin_idx = 0; i < _duration_events.size(); ++i) + { + const auto &evt = _duration_events.at(i); + if (evt.name.compare("Graph") == 0) + { + if (evt.ph.compare("B") == 0) + begin_idx = i; + else + graph_idx_list.emplace_back(begin_idx, i); + } + } + return graph_idx_list; + } + + OpSeq makeOpSeq(const DurationEvent &evt) + { + OpSeq opseq; + opseq.name = evt.name; + opseq.begin_ts = std::stoull(evt.ts); + opseq.updateRss(_ts_to_values.at(opseq.begin_ts).first); + opseq.updateMinflt(_ts_to_values.at(opseq.begin_ts).second); + opseq.backend = evt.tid; + return opseq; + } + + void updateOpSeq(OpSeq &opseq, const DurationEvent &evt) + { + opseq.end_ts = std::stoull(evt.ts); + opseq.updateRss(_ts_to_values.at(opseq.end_ts).first); + opseq.updateMinflt(_ts_to_values.at(opseq.end_ts).second); + } + + Graph makeGraph(size_t begin_idx, size_t end_idx, + const std::map<std::string, OpSeq> &name_to_opseq) + { + Graph graph; + graph.name = "Graph"; + graph.begin_ts = std::stoull(_duration_events[begin_idx].ts); + graph.updateRss(_ts_to_values.at(graph.begin_ts).first); + graph.updateMinflt(_ts_to_values.at(graph.begin_ts).second); + graph.end_ts = std::stoull(_duration_events[end_idx].ts); + graph.updateRss(_ts_to_values.at(graph.end_ts).first); + graph.updateMinflt(_ts_to_values.at(graph.end_ts).second); + graph.setOpSeqs(name_to_opseq); + return graph; + } + + void write(std::ostream &os) + { + // Write contents + for (size_t i = 0; i < _graphs.size(); ++i) + { + os << "# Graph " << i << "\n"; + _graphs.at(i).write(os); + } + } + + const std::vector<DurationEvent> &_duration_events; + const std::vector<CounterEvent> &_counter_events; + // timestamp to std::pair<maxrss, minflt> + std::unordered_map<uint64_t, std::pair<uint32_t, uint32_t>> _ts_to_values; + std::vector<Graph> _graphs; +}; + +} // namespace + void EventRecorder::emit(const DurationEvent &evt) { std::lock_guard<std::mutex> lock{_mu}; @@ -136,6 +425,9 @@ void EventRecorder::writeToFile(std::ostream &os) case WriteFormat::SNPE_BENCHMARK: writeSNPEBenchmark(os); break; + case WriteFormat::MD_TABLE: + writeMDTable(os); + break; default: assert(!"Invalid value"); break; @@ -258,3 +550,8 @@ void EventRecorder::writeChromeTrace(std::ostream &os) os << " ]\n"; os << "}\n"; } + +void EventRecorder::writeMDTable(std::ostream &os) +{ + MDTableBuilder(_duration_events, _counter_events).build().write(os); +} diff --git a/runtime/onert/core/src/util/EventRecorder.h b/runtime/onert/core/src/util/EventRecorder.h index 6eea06986..37ec1a0f1 100644 --- a/runtime/onert/core/src/util/EventRecorder.h +++ b/runtime/onert/core/src/util/EventRecorder.h @@ -53,7 +53,8 @@ public: enum class WriteFormat { CHROME_TRACING, - SNPE_BENCHMARK + SNPE_BENCHMARK, + MD_TABLE, }; public: @@ -71,6 +72,7 @@ public: private: void writeSNPEBenchmark(std::ostream &os); void writeChromeTrace(std::ostream &os); + void writeMDTable(std::ostream &os); private: std::mutex _mu; diff --git a/runtime/onert/core/src/util/ShapeInference.cc b/runtime/onert/core/src/util/ShapeInference.cc index 9a24f8c1a..95c15049d 100644 --- a/runtime/onert/core/src/util/ShapeInference.cc +++ b/runtime/onert/core/src/util/ShapeInference.cc @@ -18,8 +18,6 @@ #include "util/Utils.h" #include "ir/InternalType.h" #include "ir/Shape.h" -#include "ir/operation/AvgPool2D.h" -#include "ir/operation/MaxPool2D.h" #include "util/ShapeInference.h" #include "util/logging.h" @@ -81,10 +79,12 @@ ir::Shape broadcastShapes(const ir::Shape &lhs_shape, const ir::Shape &rhs_shape // Calculate output height and width of convolution-like operation std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, const int ker_h, const int ker_w, const ir::Padding pad, - const ir::Stride stride) + const ir::Stride stride, + const ir::Dilation dilation = {1, 1}) { int32_t out_h = 0, out_w = 0; - + int32_t effective_filter_w_size = (ker_w - 1) * dilation.width_factor + 1; + int32_t effective_filter_h_size = (ker_h - 1) * dilation.height_factor + 1; switch (pad.type) { case ir::PaddingType::SAME: @@ -92,12 +92,15 @@ std::pair<int, int> calcConvLikeHeightAndWidth(const int in_h, const int in_w, c out_w = ceil_div(in_w, stride.horizontal); break; case ir::PaddingType::VALID: - out_h = ceil_div(in_h - ker_h + 1, stride.vertical); - out_w = ceil_div(in_w - ker_w + 1, stride.horizontal); + out_h = ceil_div(in_h - effective_filter_h_size + 1, stride.vertical); + out_w = ceil_div(in_w - effective_filter_w_size + 1, stride.horizontal); break; case ir::PaddingType::EXPLICIT: - out_h = (in_h + pad.param.top + pad.param.bottom - ker_h) / stride.vertical + 1; - out_w = (in_w + pad.param.left + pad.param.right - ker_w) / stride.horizontal + 1; + out_h = + (in_h + pad.param.top + pad.param.bottom - effective_filter_h_size) / stride.vertical + 1; + out_w = + (in_w + pad.param.left + pad.param.right - effective_filter_w_size) / stride.horizontal + + 1; break; default: assert(false); @@ -126,17 +129,6 @@ ir::Shape inferArgMaxShape(const ir::Shape &input_shape, int axis, int rank) return out_shape; } -ir::Shape inferAvgPoolShape(const ir::Shape &in_shape, const ir::operation::AvgPool2D::Param ¶m, - const ir::Layout layout) -{ - assert(layout == ir::Layout::NHWC); - auto ifm_shape = in_shape.asFeature(layout); - const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, - param.padding, param.stride); - // Pooling don't change number of channels and batch size - return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}; -} - ir::Shape inferReduceShape(const ir::Shape &input_shape, const std::vector<int> &axes, bool keep_dims) { @@ -320,7 +312,7 @@ ir::Shape inferConv2DShape(const ir::Shape &in_shape, const ir::Shape &ker_shape assert(ifm_shape.C == kf_shape.C); const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, kf_shape.H, kf_shape.W, - param.padding, param.stride); + param.padding, param.stride, param.dilation); return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, kf_shape.N}; } @@ -411,17 +403,6 @@ ir::Shape inferGatherShape(const ir::Shape &input_shape, const ir::Shape &indice return out_shape; } -ir::Shape inferMaxPoolShape(const ir::Shape &in_shape, const ir::operation::MaxPool2D::Param ¶m, - const ir::Layout layout) -{ - assert(layout == ir::Layout::NHWC); - auto ifm_shape = in_shape.asFeature(layout); - const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, - param.padding, param.stride); - // Pooling don't change number of channels and batch size - return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}; -} - ir::Shape inferOnehotShape(const ir::Shape &input_shape, const int depth, int axis) { assert(depth >= 0); @@ -486,6 +467,17 @@ ir::Shape inferPadShape(const ir::Shape &in_shape, const int32_t *pad_buf, const return ret; } +ir::Shape inferPoolShape(const ir::Shape &in_shape, const ir::operation::Pool2D::Param ¶m, + const ir::Layout layout) +{ + assert(layout == ir::Layout::NHWC); + auto ifm_shape = in_shape.asFeature(layout); + const auto out_h_w = calcConvLikeHeightAndWidth(ifm_shape.H, ifm_shape.W, param.kh, param.kw, + param.padding, param.stride); + // Pooling don't change number of channels and batch size + return ir::Shape{ifm_shape.N, out_h_w.first, out_h_w.second, ifm_shape.C}; +} + ir::Shape inferResizeBilinearShape(const ir::Shape &in_shape, const int32_t output_height, const int32_t output_width) { diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h index 0f6a2a5d0..480452e01 100644 --- a/runtime/onert/frontend/base_loader/include/base_loader.h +++ b/runtime/onert/frontend/base_loader/include/base_loader.h @@ -105,40 +105,39 @@ protected: template <typename Param, typename OptionsType> void loadStridesAndPaddings(Param ¶m, const OptionsType *options); // Load Pool2D param - template <typename Param> void loadPool2D(Param ¶m, const Pool2DOptions *options); + template <typename Param> void loadPool2DOptions(Param ¶m, const Pool2DOptions *options); // Operations void loadConv2D(const Operator *op, ir::Graph &subg); void loadDepthwiseConv2D(const Operator *op, ir::Graph &subg); void loadTransposeConv(const Operator *op, ir::Graph &subg); - void loadAvgPool2D(const Operator *op, ir::Graph &subg); + void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type); void loadReshape(const Operator *op, ir::Graph &subg); void loadSoftmax(const Operator *op, ir::Graph &subg); - void loadMaxPool2D(const Operator *op, ir::Graph &subg); void loadConcatenation(const Operator *op, ir::Graph &subg); void loadFill(const Operator *op, ir::Graph &subg); void loadFC(const Operator *op, ir::Graph &subg); - void loadAdd(const Operator *op, ir::Graph &subg); - void loadSub(const Operator *op, ir::Graph &subg); - void loadMul(const Operator *op, ir::Graph &subg); - void loadDiv(const Operator *op, ir::Graph &subg); + template <ir::operation::BinaryArithmetic::ArithmeticType op_type> + void loadBinaryArithmetic(const Operator *op, ir::Graph &subg); + void loadAddV2(const Operator *op, ir::Graph &subg); void loadPack(const Operator *op, ir::Graph &subg); - void loadRelu(const Operator *op, ir::Graph &subg); - void loadRelu6(const Operator *op, ir::Graph &subg); void loadResizeBilinear(const Operator *op, ir::Graph &subg); - void loadRsqrt(const Operator *op, ir::Graph &subg); + void loadResizeNearestNeighbor(const Operator *op, ir::Graph &subg); void loadSelect(const Operator *op, ir::Graph &subg); - void loadSqrt(const Operator *op, ir::Graph &subg); void loadSquaredDifference(const Operator *op, ir::Graph &subg); - void loadTanh(const Operator *op, ir::Graph &subg); void loadTranspose(const Operator *op, ir::Graph &subg); - void loadReduce(const Operator *op, ir::Graph &subg, - ir::operation::Reduce::ReduceType reduce_type); + template <ir::operation::Reduce::ReduceType reduce_type> + void loadReduce(const Operator *op, ir::Graph &subg); void loadReduceAll(const Operator *op, ir::Graph &subg); void loadReverseV2(const Operator *op, ir::Graph &subg); void loadPad(const Operator *op, ir::Graph &subg); - void loadLogistic(const Operator *op, ir::Graph &subg); - void loadExp(const Operator *op, ir::Graph &subg); + void loadElementwiseActivation(const Operator *op, ir::Graph &subg, + ir::operation::ElementwiseActivation::Type op_type, + float alpha = 0.f, float beta = 0.f); + template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type> + void loadElementwiseBinary(const Operator *op, ir::Graph &subg); + void loadElementwiseUnary(const Operator *op, ir::Graph &subg, + ir::operation::ElementwiseUnary::Type op_type); void loadExpandDims(const Operator *op, ir::Graph &subg); void loadGather(const Operator *op, ir::Graph &subg); void loadCustom(const Operator *op, ir::Graph &subg); @@ -152,35 +151,25 @@ protected: void loadSlice(const Operator *op, ir::Graph &subg); void loadStridedSlice(const Operator *op, ir::Graph &subg); void loadUnpack(const Operator *op, ir::Graph &subg); - void loadMinimum(const Operator *op, ir::Graph &subg); - void loadMaximum(const Operator *op, ir::Graph &subg); - void loadCast(const Operator *op, ir::Graph &subg); void loadComparison(const Operator *op, ir::Graph &subg); void loadEinsum(const Operator *op, ir::Graph &subg); void loadOneHot(const Operator *op, ir::Graph &subg); - void loadAbs(const Operator *op, ir::Graph &subg); - void loadCos(const Operator *op, ir::Graph &subg); - void loadSin(const Operator *op, ir::Graph &subg); void loadShape(const Operator *op, ir::Graph &subg); void loadIf(const Operator *op, ir::Graph &subg); void loadWhile(const Operator *op, ir::Graph &subg); - void loadNeg(const Operator *op, ir::Graph &subg); - void loadLog(const Operator *op, ir::Graph &subg); void loadArgMax(const Operator *op, ir::Graph &subg); - void loadRound(const Operator *op, ir::Graph &subg); void loadPow(const Operator *op, ir::Graph &subg); - void loadLogicalNot(const Operator *op, ir::Graph &subg); - void loadZerosLike(const Operator *op, ir::Graph &subg); void loadTile(const Operator *op, ir::Graph &subg); - void loadLogicalOr(const Operator *op, ir::Graph &subg); void loadRange(const Operator *op, ir::Graph &subg); + void loadRank(const Operator *op, ir::Graph &subg); void loadMatrixBandPart(const Operator *op, ir::Graph &subg); void loadBroadcastTo(const Operator *op, ir::Graph &subg); void loadFusedBatchNorm(const Operator *op, ir::Graph &subg); void loadLogSoftmax(const Operator *op, ir::Graph &subg); - void loadQuantize(const Operator *op, ir::Graph &subg); void loadSpaceToDepth(const Operator *op, ir::Graph &subg); void loadStatelessRandomUniform(const Operator *op, ir::Graph &subg); + void loadL2Normalization(const Operator *op, ir::Graph &subg); + void loadLeakyRelu(const Operator *op, ir::Graph &subg); protected: // Base address for mapped region for loading (if needed) @@ -194,6 +183,7 @@ protected: const Model *_model; // Maps Tensor indices to onert Operands. std::vector<ir::OperandIndex> _tensor_to_operand; + std::unordered_map<ir::OperandIndex, std::string> _tensor_names; // Verifier std::unique_ptr<Verifier> _verifier; }; @@ -466,8 +456,8 @@ ir::OperandIndex BaseLoader<LoaderDomain, SpecificLoader>::loadOperand(const Ten subg.setOperandValue(operand_index, std::move(data_obj)); } - // Name unused - // auto name = tensor->name(); + _tensor_names.emplace(operand_index, tensor->name()->str()); + // Variablie if (tensor->is_variable()) throw std::runtime_error("Variable tensor not supported!"); @@ -518,8 +508,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStridesAndPaddings(Param &par template <typename LoaderDomain, typename SpecificLoader> template <typename Param> -void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(Param ¶m, - const Pool2DOptions *options) +void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2DOptions(Param ¶m, + const Pool2DOptions *options) { // Strides and Paddings loadStridesAndPaddings(param, options); @@ -543,7 +533,10 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadConv2D(const Operator *op, ir const auto *options = op->builtin_options_as_Conv2DOptions(); param.activation = convertActivation(options->fused_activation_function()); loadStridesAndPaddings(param, options); - // Dilation h/w factor unused + + param.dilation.width_factor = options->dilation_w_factor(); + param.dilation.height_factor = options->dilation_h_factor(); + std::unique_ptr<ir::Operation> new_op(new ir::operation::Conv2D(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } @@ -585,19 +578,21 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTransposeConv(const Operator } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadAvgPool2D(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadPool2D(const Operator *op, ir::Graph &subg, + ir::operation::Pool2D::PoolType op_type) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - ir::operation::AvgPool2D::Param param; + ir::operation::Pool2D::Param param; + param.op_type = op_type; const auto *options = op->builtin_options_as_Pool2DOptions(); - loadPool2D(param, options); + loadPool2DOptions(param, options); - std::unique_ptr<ir::Operation> new_op(new ir::operation::AvgPool2D(inputs, outputs, param)); + std::unique_ptr<ir::Operation> new_op(new ir::operation::Pool2D(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } @@ -645,23 +640,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSoftmax(const Operator *op, i } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadMaxPool2D(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - ir::operation::MaxPool2D::Param param; - const auto *options = op->builtin_options_as_Pool2DOptions(); - - loadPool2D(param, options); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::MaxPool2D(inputs, outputs, param)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadConcatenation(const Operator *op, ir::Graph &subg) { @@ -719,70 +697,82 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadFC(const Operator *op, ir::Gr } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadAdd(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - ir::operation::Add::Param param; - const auto *options = op->builtin_options_as_AddOptions(); - - param.activation = convertActivation(options->fused_activation_function()); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Add(inputs, outputs, param)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadSub(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - ir::operation::Sub::Param param; - const auto *options = op->builtin_options_as_SubOptions(); - - param.activation = convertActivation(options->fused_activation_function()); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Sub(inputs, outputs, param)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadMul(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadAddV2(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - ir::operation::Mul::Param param; - const auto *options = op->builtin_options_as_MulOptions(); + ir::operation::BinaryArithmetic::Param param; + param.arithmetic_type = ir::operation::BinaryArithmetic::ArithmeticType::ADD; - param.activation = convertActivation(options->fused_activation_function()); + if (op->custom_options() == nullptr) + { + param.activation = ir::Activation::NONE; + } + else + { + size_t custom_op_data_size = op->custom_options()->size(); + auto custom_op_data = op->custom_options()->Data(); + auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size); + auto attr_map = data_root.AsMap(); + const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>( + attr_map["fused_activation_function"].AsInt8()); + param.activation = convertActivation(fused_activation_func); + } - std::unique_ptr<ir::Operation> new_op(new ir::operation::Mul(inputs, outputs, param)); + std::unique_ptr<ir::Operation> new_op( + new ir::operation::BinaryArithmetic(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadDiv(const Operator *op, ir::Graph &subg) +template <ir::operation::BinaryArithmetic::ArithmeticType op_type> +void BaseLoader<LoaderDomain, SpecificLoader>::loadBinaryArithmetic(const Operator *op, + ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - ir::operation::Div::Param param; - const auto *options = op->builtin_options_as_DivOptions(); - - param.activation = convertActivation(options->fused_activation_function()); + ir::operation::BinaryArithmetic::Param param; + param.arithmetic_type = op_type; + switch (op_type) + { + case ir::operation::BinaryArithmetic::ArithmeticType::ADD: + { + const auto *add_options = op->builtin_options_as_AddOptions(); + param.activation = convertActivation(add_options->fused_activation_function()); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::SUB: + { + const auto *sub_options = op->builtin_options_as_SubOptions(); + param.activation = convertActivation(sub_options->fused_activation_function()); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::MUL: + { + const auto *mul_options = op->builtin_options_as_MulOptions(); + param.activation = convertActivation(mul_options->fused_activation_function()); + break; + } + case ir::operation::BinaryArithmetic::ArithmeticType::DIV: + { + const auto *div_options = op->builtin_options_as_DivOptions(); + param.activation = convertActivation(div_options->fused_activation_function()); + break; + } + default: + assert(false && + "The function 'loadBinaryArithmetic' supports only BinaryArithmetic operations"); + break; + } - std::unique_ptr<ir::Operation> new_op(new ir::operation::Div(inputs, outputs, param)); + std::unique_ptr<ir::Operation> new_op( + new ir::operation::BinaryArithmetic(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } @@ -805,26 +795,22 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPack(const Operator *op, ir:: } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseActivation( + const Operator *op, ir::Graph &subg, ir::operation::ElementwiseActivation::Type op_type, + float alpha, float beta) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} + ir::operation::ElementwiseActivation::Param param; + param.op_type = op_type; + param.alpha = alpha; + param.beta = beta; -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadRelu6(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::ReLU6(inputs, outputs)); + std::unique_ptr<ir::Operation> new_op( + new ir::operation::ElementwiseActivation(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } @@ -856,38 +842,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeBilinear(const Operator } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadRsqrt(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadResizeNearestNeighbor(const Operator *op, + ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); + auto input = inputs.at(0); + auto size = inputs.at(1); - std::unique_ptr<ir::Operation> new_op(new ir::operation::RSQRT(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} + if (!subg.operands().at(size).isConstant()) + throw std::runtime_error("ResizeNearestNeighbor: non-constant 'size' is not supported."); -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; + std::vector<std::int32_t> size_v = subg.operands().at(size).template asVector<std::int32_t>(); - loadOperationIO(op, inputs, outputs); + ir::operation::ResizeNearestNeighbor::Param param; + param.height_out = size_v[0]; + param.width_out = size_v[1]; + param.align_corners = op->builtin_options_as_ResizeNearestNeighborOptions()->align_corners(); - std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs)); + std::unique_ptr<ir::Operation> new_op( + new ir::operation::ResizeNearestNeighbor({input}, outputs, param)); subg.addOperation(std::move(new_op)); } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadSqrt(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadSelect(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - std::unique_ptr<ir::Operation> new_op(new ir::operation::SQRT(inputs, outputs)); + std::unique_ptr<ir::Operation> new_op(new ir::operation::Select(inputs, outputs)); subg.addOperation(std::move(new_op)); } @@ -905,18 +893,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadSquaredDifference(const Opera } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadTanh(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Tanh(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -937,8 +913,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTranspose(const Operator *op, } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce( - const Operator *op, ir::Graph &subg, ir::operation::Reduce::ReduceType reduce_type) +template <ir::operation::Reduce::ReduceType reduce_type> +void BaseLoader<LoaderDomain, SpecificLoader>::loadReduce(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; @@ -1005,26 +981,49 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPad(const Operator *op, ir::G } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadLogistic(const Operator *op, ir::Graph &subg) +template <ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type> +void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseBinary(const Operator *op, + ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - std::unique_ptr<ir::Operation> new_op(new ir::operation::Logistic(inputs, outputs)); + ir::operation::ElementwiseBinary::Param param; + param.op_type = op_type; + + std::unique_ptr<ir::Operation> new_op( + new ir::operation::ElementwiseBinary(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadExp(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadElementwiseUnary( + const Operator *op, ir::Graph &subg, ir::operation::ElementwiseUnary::Type op_type) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - std::unique_ptr<ir::Operation> new_op(new ir::operation::Exp(inputs, outputs)); + ir::operation::ElementwiseUnary::Param param; + param.op_type = op_type; + + if (op_type == ir::operation::ElementwiseUnary::Type::CAST) + { + auto qasymm8ToUint8 = [](ir::Operand &operand) { + if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) + { + operand.type(ir::DataType::UINT8); + } + }; + qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::ElementwiseUnary::Input::INPUT))); + qasymm8ToUint8(subg.operands().at(outputs.at(0))); + } + + std::unique_ptr<ir::Operation> new_op( + new ir::operation::ElementwiseUnary(inputs, outputs, param)); subg.addOperation(std::move(new_op)); } @@ -1177,6 +1176,17 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadStatelessRandomUniform(const } template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadRank(const Operator *op, ir::Graph &subg) +{ + ir::OperandIndexSequence inputs; + ir::OperandIndexSequence outputs; + loadOperationIO(op, inputs, outputs); + + std::unique_ptr<ir::Operation> new_op(new ir::operation::Rank(inputs, outputs)); + subg.addOperation(std::move(new_op)); +} + +template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1197,7 +1207,8 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir Einsum, BroadcastTo, FusedBatchNorm, - StatelessRandomUniform + StatelessRandomUniform, + Erf }; // Mapping from custom op name string to BuiltinOP enum @@ -1210,6 +1221,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir {"FusedBatchNormV3", BuiltinOP::FusedBatchNorm}, {"BroadcastTo", BuiltinOP::BroadcastTo}, {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform}, + {"Erf", BuiltinOP::Erf}, }; try @@ -1219,7 +1231,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir switch (custom_op_id) { case BuiltinOP::AddV2: - loadAdd(op, subg); + loadAddV2(op, subg); break; case BuiltinOP::ReduceAll: loadReduceAll(op, subg); @@ -1242,6 +1254,9 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadCustom(const Operator *op, ir case BuiltinOP::StatelessRandomUniform: loadStatelessRandomUniform(op, subg); break; + case BuiltinOP::Erf: + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF); + break; default: throw std::runtime_error{ "Loader: Custom OP map is defined but operation loader function is not defined"}; @@ -1396,51 +1411,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadUnpack(const Operator *op, ir } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadMinimum(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Min(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadMaximum(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Max(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadCast(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - auto qasymm8ToUint8 = [](ir::Operand &operand) { - if (operand.typeInfo().type() == ir::DataType::QUANT_UINT8_ASYMM) - { - operand.type(ir::DataType::UINT8); - } - }; - qasymm8ToUint8(subg.operands().at(inputs.at(ir::operation::Cast::Input::INPUT))); - qasymm8ToUint8(subg.operands().at(outputs.at(0))); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Cast(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadComparison(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1562,42 +1532,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOneHot(const Operator *op, ir } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadAbs(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Abs(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadCos(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Cos(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadSin(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Sin(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadShape(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1652,18 +1586,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadWhile(const Operator *op, ir: } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadNeg(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Neg(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1697,30 +1619,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadArgMax(const Operator *op, ir } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadLog(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Log(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadRound(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::Round(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1733,31 +1631,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadPow(const Operator *op, ir::G } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalNot(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalNot(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadZerosLike(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::ZerosLike(inputs, outputs)); - - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadRange(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1787,18 +1660,6 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadTile(const Operator *op, ir:: } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadLogicalOr(const Operator *op, ir::Graph &subg) -{ - ir::OperandIndexSequence inputs; - ir::OperandIndexSequence outputs; - - loadOperationIO(op, inputs, outputs); - - std::unique_ptr<ir::Operation> new_op(new ir::operation::LogicalOr(inputs, outputs)); - subg.addOperation(std::move(new_op)); -} - -template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op, ir::Graph &subg) { ir::OperandIndexSequence inputs; @@ -1817,18 +1678,27 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadLogSoftmax(const Operator *op } template <typename LoaderDomain, typename SpecificLoader> -void BaseLoader<LoaderDomain, SpecificLoader>::loadQuantize(const Operator *op, ir::Graph &subg) +void BaseLoader<LoaderDomain, SpecificLoader>::loadL2Normalization(const Operator *op, + ir::Graph &subg) { ir::OperandIndexSequence inputs; ir::OperandIndexSequence outputs; loadOperationIO(op, inputs, outputs); - std::unique_ptr<ir::Operation> new_op(new ir::operation::Quantize(inputs, outputs)); + std::unique_ptr<ir::Operation> new_op(new ir::operation::L2Normalization(inputs, outputs)); subg.addOperation(std::move(new_op)); } template <typename LoaderDomain, typename SpecificLoader> +void BaseLoader<LoaderDomain, SpecificLoader>::loadLeakyRelu(const Operator *op, ir::Graph &subg) +{ + float alpha = op->builtin_options_as_LeakyReluOptions()->alpha(); + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LEAKY_RELU, alpha, + 1.f); +} + +template <typename LoaderDomain, typename SpecificLoader> void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, ir::Graph &subg) { const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code(); @@ -1839,7 +1709,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadConv2D(op, subg); return; case BuiltinOperator::BuiltinOperator_AVERAGE_POOL_2D: - loadAvgPool2D(op, subg); + loadPool2D(op, subg, ir::operation::Pool2D::PoolType::AVG); return; case BuiltinOperator::BuiltinOperator_DEPTHWISE_CONV_2D: loadDepthwiseConv2D(op, subg); @@ -1854,7 +1724,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadSoftmax(op, subg); return; case BuiltinOperator::BuiltinOperator_MAX_POOL_2D: - loadMaxPool2D(op, subg); + loadPool2D(op, subg, ir::operation::Pool2D::PoolType::MAX); return; case BuiltinOperator::BuiltinOperator_CONCATENATION: loadConcatenation(op, subg); @@ -1863,31 +1733,40 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadFC(op, subg); return; case BuiltinOperator::BuiltinOperator_ADD: - loadAdd(op, subg); + loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::ADD>(op, subg); return; case BuiltinOperator::BuiltinOperator_SUB: - loadSub(op, subg); + loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::SUB>(op, subg); return; case BuiltinOperator::BuiltinOperator_MUL: - loadMul(op, subg); + loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::MUL>(op, subg); return; case BuiltinOperator::BuiltinOperator_DIV: - loadDiv(op, subg); + loadBinaryArithmetic<ir::operation::BinaryArithmetic::ArithmeticType::DIV>(op, subg); return; case BuiltinOperator::BuiltinOperator_PACK: loadPack(op, subg); return; case BuiltinOperator::BuiltinOperator_RELU: - loadRelu(op, subg); + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, + ir::operation::ElementwiseActivation::infinity, 0.f); + return; + case BuiltinOperator::BuiltinOperator_RELU_N1_TO_1: + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 1.f, + -1.f); return; case BuiltinOperator::BuiltinOperator_RELU6: - loadRelu6(op, subg); + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::RELU, 6.f, + 0.f); return; case BuiltinOperator::BuiltinOperator_RESIZE_BILINEAR: loadResizeBilinear(op, subg); return; + case BuiltinOperator::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR: + loadResizeNearestNeighbor(op, subg); + return; case BuiltinOperator::BuiltinOperator_RSQRT: - loadRsqrt(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::RSQRT); return; case BuiltinOperator::BuiltinOperator_SELECT: loadSelect(op, subg); @@ -1897,37 +1776,39 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadSelect(op, subg); return; case BuiltinOperator::BuiltinOperator_SQRT: - loadSqrt(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SQRT); return; case BuiltinOperator::BuiltinOperator_SQUARED_DIFFERENCE: loadSquaredDifference(op, subg); return; case BuiltinOperator::BuiltinOperator_TANH: - loadTanh(op, subg); + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::TANH, 1.f, + 1.f); return; case BuiltinOperator::BuiltinOperator_TRANSPOSE: loadTranspose(op, subg); return; case BuiltinOperator::BuiltinOperator_MEAN: - loadReduce(op, subg, ir::operation::Reduce::ReduceType::MEAN); + loadReduce<ir::operation::Reduce::ReduceType::MEAN>(op, subg); return; case BuiltinOperator::BuiltinOperator_REDUCE_ANY: - loadReduce(op, subg, ir::operation::Reduce::ReduceType::ANY); + loadReduce<ir::operation::Reduce::ReduceType::ANY>(op, subg); return; case BuiltinOperator::BuiltinOperator_REDUCE_MAX: - loadReduce(op, subg, ir::operation::Reduce::ReduceType::MAX); + loadReduce<ir::operation::Reduce::ReduceType::MAX>(op, subg); return; case BuiltinOperator::BuiltinOperator_REVERSE_V2: loadReverseV2(op, subg); return; case BuiltinOperator::BuiltinOperator_PAD: + case BuiltinOperator::BuiltinOperator_PADV2: loadPad(op, subg); return; case BuiltinOperator::BuiltinOperator_LOGISTIC: - loadLogistic(op, subg); + loadElementwiseActivation(op, subg, ir::operation::ElementwiseActivation::Type::LOGISTIC); return; case BuiltinOperator::BuiltinOperator_EXP: - loadExp(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::EXP); return; case BuiltinOperator::BuiltinOperator_EXPAND_DIMS: loadExpandDims(op, subg); @@ -1942,7 +1823,7 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadBatchToSpaceND(op, subg); return; case BuiltinOperator::BuiltinOperator_SUM: - loadReduce(op, subg, ir::operation::Reduce::ReduceType::SUM); + loadReduce<ir::operation::Reduce::ReduceType::SUM>(op, subg); return; case BuiltinOperator::BuiltinOperator_CUSTOM: loadCustom(op, subg); @@ -1969,13 +1850,13 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadUnpack(op, subg); return; case BuiltinOperator::BuiltinOperator_MINIMUM: - loadMinimum(op, subg); + loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MIN>(op, subg); return; case BuiltinOperator::BuiltinOperator_MAXIMUM: - loadMaximum(op, subg); + loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::MAX>(op, subg); return; case BuiltinOperator::BuiltinOperator_CAST: - loadCast(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::CAST); return; case BuiltinOperator::BuiltinOperator_EQUAL: case BuiltinOperator::BuiltinOperator_NOT_EQUAL: @@ -1989,19 +1870,19 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadOneHot(op, subg); return; case BuiltinOperator::BuiltinOperator_ABS: - loadAbs(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ABS); return; case BuiltinOperator::BuiltinOperator_COS: - loadCos(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::COS); return; case BuiltinOperator::BuiltinOperator_SIN: - loadSin(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::SIN); return; case BuiltinOperator::BuiltinOperator_SHAPE: loadShape(op, subg); return; case BuiltinOperator::BuiltinOperator_REDUCE_PROD: - loadReduce(op, subg, ir::operation::Reduce::ReduceType::PROD); + loadReduce<ir::operation::Reduce::ReduceType::PROD>(op, subg); return; case BuiltinOperator::BuiltinOperator_IF: loadIf(op, subg); @@ -2010,31 +1891,32 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadWhile(op, subg); return; case BuiltinOperator::BuiltinOperator_NEG: - loadNeg(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::NEG); return; case BuiltinOperator::BuiltinOperator_ARG_MAX: loadArgMax(op, subg); return; case BuiltinOperator::BuiltinOperator_LOG: - loadLog(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOG); return; case BuiltinOperator::BuiltinOperator_ROUND: - loadRound(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ROUND); return; case BuiltinOperator::BuiltinOperator_POW: loadPow(op, subg); return; case BuiltinOperator::BuiltinOperator_LOGICAL_NOT: - loadLogicalNot(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::LOGICAL_NOT); return; case BuiltinOperator::BuiltinOperator_LOGICAL_OR: - loadLogicalOr(op, subg); + loadElementwiseBinary<ir::operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR>( + op, subg); return; case BuiltinOperator::BuiltinOperator_FILL: loadFill(op, subg); return; case BuiltinOperator::BuiltinOperator_ZEROS_LIKE: - loadZerosLike(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ZEROS_LIKE); return; case BuiltinOperator::BuiltinOperator_TILE: loadTile(op, subg); @@ -2049,11 +1931,20 @@ void BaseLoader<LoaderDomain, SpecificLoader>::loadOperation(const Operator *op, loadLogSoftmax(op, subg); return; case BuiltinOperator::BuiltinOperator_QUANTIZE: - loadQuantize(op, subg); + loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::QUANTIZE); return; case BuiltinOperator::BuiltinOperator_SPACE_TO_DEPTH: loadSpaceToDepth(op, subg); return; + case BuiltinOperator::BuiltinOperator_L2_NORMALIZATION: + loadL2Normalization(op, subg); + break; + case BuiltinOperator::BuiltinOperator_LEAKY_RELU: + loadLeakyRelu(op, subg); + return; + case BuiltinOperator::BuiltinOperator_RANK: + loadRank(op, subg); + return; default: throw std::runtime_error( std::string("Unsupported operation: ").append(EnumNameBuiltinOperator(builtin_op))); diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc index 96dd4698a..92a9ee7a5 100644 --- a/runtime/onert/frontend/circle/src/circle_loader.cc +++ b/runtime/onert/frontend/circle/src/circle_loader.cc @@ -103,12 +103,14 @@ public: // Set inputs for (const std::int32_t input_ind : *circle_subg->inputs()) { - subg->addInput(tensorIdxToOperandIdx(input_ind)); + subg->addInput(tensorIdxToOperandIdx(input_ind), + _tensor_names.at(_tensor_to_operand[input_ind])); } // Set outputs for (const std::int32_t output_ind : *circle_subg->outputs()) { - subg->addOutput(tensorIdxToOperandIdx(output_ind)); + subg->addOutput(tensorIdxToOperandIdx(output_ind), + _tensor_names.at(_tensor_to_operand[output_ind])); } // Create operations for (const auto *op : *circle_subg->operators()) diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc index 8ff6cbbfd..8e3d83db4 100644 --- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc +++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc @@ -83,6 +83,189 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index) } OperationFactory::Generator +getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type, + float alpha = 0.f, float beta = 0.f) +{ + return [op_type, alpha, beta](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 1); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::ElementwiseActivation::Param param; + param.op_type = op_type; + param.alpha = alpha; + param.beta = beta; + + return new operation::ElementwiseActivation{inputs, outputs, param}; + }; +} + +OperationFactory::Generator getElementwiseBinaryGenerator( + const onert::ir::operation::ElementwiseBinary::ElementwiseBinaryType op_type) +{ + return [op_type](const OperationFactory::Param &init_param, Operands &) { + assert(init_param.input_count == 2); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Lefthand side operand + // 1 -> Righthand side operand + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::ElementwiseBinary::Param param; + param.op_type = op_type; + + return new operation::ElementwiseBinary{inputs, outputs, param}; + }; +} + +OperationFactory::Generator +getElementwiseUnaryGenerator(const onert::ir::operation::ElementwiseUnary::Type op_type) +{ + return [op_type](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 1); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Input Tensor Index + + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::ElementwiseUnary::Param param; + param.op_type = op_type; + + if (op_type == operation::ElementwiseUnary::Type::CAST) + { + // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's + // input/output + if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM) + { + replaceDataType(operands, inputs.at(0), DataType::UINT8); + } + if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM) + { + replaceDataType(operands, outputs.at(0), DataType::UINT8); + } + } + + return new operation::ElementwiseUnary{inputs, outputs, param}; + }; +} + +OperationFactory::Generator +getBinaryArithmeticGenerator(const onert::ir::operation::BinaryArithmetic::ArithmeticType op_type) +{ + return [op_type](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 3); + assert(init_param.output_count == 1); + + // Each input should be interpreted as follows: + // + // 0 -> Lefthand side operand + // 1 -> Righthand side operand + + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::BinaryArithmetic::Param param; + param.arithmetic_type = op_type; + const auto activation_index = OperandIndex{init_param.inputs[2]}; + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + + return new operation::BinaryArithmetic{inputs, outputs, param}; + }; +} + +OperationFactory::Generator +getPool2DGenerator(const onert::ir::operation::Pool2D::PoolType pool_type) +{ + return [pool_type](const OperationFactory::Param &init_param, Operands &operands) { + assert(init_param.input_count == 7 || init_param.input_count == 10); + assert(init_param.output_count == 1); + + // In common + // 0 -> IFM Tensor Index + OperandIndexSequence inputs{init_param.inputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; + + operation::Pool2D::Param param; + param.op_type = pool_type; + if (init_param.input_count == 7) // support implicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index + // 2 -> Horizontal (over width) Stride Index + // 3 -> Vertial (over height) Stride Index + // 4 -> Filter Width Index + // 5 -> Filter Height Index + // 6 -> FuseCode (activation) Index + + const auto padding_index = OperandIndex{init_param.inputs[1]}; + const auto hstride_index = OperandIndex{init_param.inputs[2]}; + const auto vstride_index = OperandIndex{init_param.inputs[3]}; + const auto kw_index = OperandIndex{init_param.inputs[4]}; + const auto kh_index = OperandIndex{init_param.inputs[5]}; + const auto activation_index = OperandIndex{init_param.inputs[6]}; + + param.padding.type = + NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = operands.at(kh_index).asScalar<uint32_t>(); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else // support explicit padding + { + // Each input should be interpreted as follows: + // + // 1 -> Padding_left index + // 2 -> Padding_right index + // 3 -> Padding_top index + // 4 -> Padding_bottom index + // 5 -> Horizontal (over width) Stride Index + // 6 -> Vertial (over height) Stride Index + // 7 -> Filter Width Index + // 8 -> Filter Height Index + // 9 -> FuseCode (activation) Index + + const auto padding_left_index = OperandIndex{init_param.inputs[1]}; + const auto padding_right_index = OperandIndex{init_param.inputs[2]}; + const auto padding_top_index = OperandIndex{init_param.inputs[3]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[4]}; + const auto hstride_index = OperandIndex{init_param.inputs[5]}; + const auto vstride_index = OperandIndex{init_param.inputs[6]}; + const auto kw_index = OperandIndex{init_param.inputs[7]}; + const auto kh_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); + param.kw = getUint32Scalar(operands, kw_index); + param.kh = getUint32Scalar(operands, kh_index); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + return new operation::Pool2D{inputs, outputs, param}; + }; +} + +OperationFactory::Generator getReduceGenerator(const onert::ir::operation::Reduce::ReduceType reduce_type) { return [reduce_type](const OperationFactory::Param &init_param, Operands &operands) { @@ -133,79 +316,24 @@ Operation *createSimpleBinaryOp(const OperationFactory::Param &init_param, Opera return new T{inputs, outputs}; } -// A generator function for binary ops with no params -template <typename T> -Operation *createPool2DOp(const OperationFactory::Param &init_param, Operands &operands) +OperationFactory::Generator getComparisonGenerator(operation::Comparison::ComparisonType type) { - assert(init_param.input_count == 7 || init_param.input_count == 10); - assert(init_param.output_count == 1); + return [type](const OperationFactory::Param &init_param, Operands &) -> Operation * { + assert(init_param.input_count == 2 && init_param.output_count == 1); - // In common - // 0 -> IFM Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - OperandIndexSequence outputs{init_param.outputs[0]}; + OperandIndexSequence outputs{init_param.outputs[0]}; - typename T::Param param; - if (init_param.input_count == 7) // support implicit padding - { // Each input should be interpreted as follows: // - // 1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index - // 2 -> Horizontal (over width) Stride Index - // 3 -> Vertial (over height) Stride Index - // 4 -> Filter Width Index - // 5 -> Filter Height Index - // 6 -> FuseCode (activation) Index - - const auto padding_index = OperandIndex{init_param.inputs[1]}; - const auto hstride_index = OperandIndex{init_param.inputs[2]}; - const auto vstride_index = OperandIndex{init_param.inputs[3]}; - const auto kw_index = OperandIndex{init_param.inputs[4]}; - const auto kh_index = OperandIndex{init_param.inputs[5]}; - const auto activation_index = OperandIndex{init_param.inputs[6]}; + // 0 -> input0 Tensor Index + // 1 -> input1 Tensor Index + OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - param.padding.type = - NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); - param.stride = makeStride(operands, hstride_index, vstride_index); - param.kw = getUint32Scalar(operands, kw_index); - param.kh = operands.at(kh_index).asScalar<uint32_t>(); - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); - } - else // support explicit padding - { - // Each input should be interpreted as follows: - // - // 1 -> Padding_left index - // 2 -> Padding_right index - // 3 -> Padding_top index - // 4 -> Padding_bottom index - // 5 -> Horizontal (over width) Stride Index - // 6 -> Vertial (over height) Stride Index - // 7 -> Filter Width Index - // 8 -> Filter Height Index - // 9 -> FuseCode (activation) Index - - const auto padding_left_index = OperandIndex{init_param.inputs[1]}; - const auto padding_right_index = OperandIndex{init_param.inputs[2]}; - const auto padding_top_index = OperandIndex{init_param.inputs[3]}; - const auto padding_bottom_index = OperandIndex{init_param.inputs[4]}; - const auto hstride_index = OperandIndex{init_param.inputs[5]}; - const auto vstride_index = OperandIndex{init_param.inputs[6]}; - const auto kw_index = OperandIndex{init_param.inputs[7]}; - const auto kh_index = OperandIndex{init_param.inputs[8]}; - const auto activation_index = OperandIndex{init_param.inputs[9]}; - - param.padding.type = PaddingType::EXPLICIT; - param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, - padding_top_index, padding_bottom_index); - param.stride = makeStride(operands, hstride_index, vstride_index); - param.kw = getUint32Scalar(operands, kw_index); - param.kh = getUint32Scalar(operands, kh_index); - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); - } - return new T{inputs, outputs, param}; + operation::Comparison::Param param; + param.comparison_type = type; + + return new operation::Comparison{inputs, outputs, param}; + }; } } // namespace @@ -295,9 +423,9 @@ OperationFactory::OperationFactory() return new operation::DepthwiseConv2D{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_MAX_POOL_2D] = createPool2DOp<operation::MaxPool2D>; + _map[ANEURALNETWORKS_MAX_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::MAX); - _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = createPool2DOp<operation::AvgPool2D>; + _map[ANEURALNETWORKS_AVERAGE_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::AVG); _map[ANEURALNETWORKS_CONCATENATION] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -383,27 +511,8 @@ OperationFactory::OperationFactory() return new operation::Softmax{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_CAST] = [](const OperationFactory::Param &init_param, Operands &operands) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // 0 -> input Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - - // NNAPI uses QUANT_UINT8_ASYMM to represent UINT8 type for ANEURALNETWORKS_CAST's input/output - if (operands.at(inputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM) - { - replaceDataType(operands, inputs.at(0), DataType::UINT8); - } - if (operands.at(outputs.at(0)).typeInfo().type() == DataType::QUANT_UINT8_ASYMM) - { - replaceDataType(operands, outputs.at(0), DataType::UINT8); - } - - return new operation::Cast{inputs, outputs}; - }; + _map[ANEURALNETWORKS_CAST] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST); // ANEURALNETWORKS_CAST_EX is deprecated // TODO Remove ANEURALNETWORKS_CAST_EX @@ -416,7 +525,8 @@ OperationFactory::OperationFactory() // inputCount is either 7 or 10 acccording to NN API specification. // - Padding is implicit when inputCount is 7 // - Padding is explicit when inputCount is 10 - assert(init_param.input_count == 7 || init_param.input_count == 10); + assert(init_param.input_count == 7 || init_param.input_count == 10 || + init_param.input_count == 13); assert(init_param.output_count == 1); // 0 -> IFM Tensor Index @@ -427,7 +537,6 @@ OperationFactory::OperationFactory() OperandIndexSequence outputs{init_param.outputs[0]}; Conv2D::Param param; - if (init_param.input_count == 7) // support implicit padding { // Each input should be interpreted as follows: @@ -445,6 +554,10 @@ OperationFactory::OperationFactory() param.padding.type = NNAPIConvert::getPaddingType(operands.at(padding_index).asScalar<PaddingCode>()); param.stride = makeStride(operands, hstride_index, vstride_index); + + param.dilation.width_factor = 1; + param.dilation.height_factor = 1; + param.activation = NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } @@ -472,34 +585,62 @@ OperationFactory::OperationFactory() param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, padding_top_index, padding_bottom_index); param.stride = makeStride(operands, hstride_index, vstride_index); + + param.dilation.width_factor = 1; + param.dilation.height_factor = 1; + param.activation = NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); } + else if (init_param.input_count == 13) // support dilation + { + // Each input should be interpreted as follows: + // + // 3 -> Padding_left Index + // 4 -> Padding_right Index + // 5 -> Padding_top Index + // 6 -> Padding_bottom Index + // 7 -> Stride (width) Index + // 8 -> Stride (height) Index + // 9 -> Activation Index + // 11 -> Dilation (width_factor) Index + // 12 -> Dilation (height_factor) INdex - return new Conv2D{inputs, outputs, param}; - }; - - _map[ANEURALNETWORKS_ADD] = [](const OperationFactory::Param &init_param, Operands &operands) { - assert(init_param.input_count == 3); - assert(init_param.output_count == 1); + const auto padding_left_index = OperandIndex{init_param.inputs[3]}; + const auto padding_right_index = OperandIndex{init_param.inputs[4]}; + const auto padding_top_index = OperandIndex{init_param.inputs[5]}; + const auto padding_bottom_index = OperandIndex{init_param.inputs[6]}; + const auto hstride_index = OperandIndex{init_param.inputs[7]}; + const auto vstride_index = OperandIndex{init_param.inputs[8]}; + const auto activation_index = OperandIndex{init_param.inputs[9]}; + const auto width_factor_index = OperandIndex{init_param.inputs[11]}; + const auto height_factor_index = OperandIndex{init_param.inputs[12]}; - // Each input should be interpreted as follows: - // - // 0 -> Lefthand side operand - // 1 -> Righthand side operand + param.padding.type = PaddingType::EXPLICIT; + param.padding.param = makeExplicitPadding(operands, padding_left_index, padding_right_index, + padding_top_index, padding_bottom_index); + param.stride = makeStride(operands, hstride_index, vstride_index); - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - OperandIndexSequence outputs{init_param.outputs[0]}; + auto width_factor = operands.at(width_factor_index).asScalar<int32_t>(); + auto height_factor = operands.at(height_factor_index).asScalar<int32_t>(); - operation::Add::Param param; + param.dilation.width_factor = width_factor; + param.dilation.height_factor = height_factor; - const auto activation_index = OperandIndex{init_param.inputs[2]}; - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + param.activation = + NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); + } + else + { + throw std::runtime_error{"Conv2D: unsupported input operand count"}; + } - return new operation::Add{inputs, outputs, param}; + return new Conv2D{inputs, outputs, param}; }; + _map[ANEURALNETWORKS_ADD] = + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::ADD); + _map[ANEURALNETWORKS_ADDV2_EX] = _map[ANEURALNETWORKS_ADD]; _map[ANEURALNETWORKS_REDUCE_SUM] = @@ -509,26 +650,8 @@ OperationFactory::OperationFactory() // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM]; - _map[ANEURALNETWORKS_SUB] = [](const OperationFactory::Param &init_param, Operands &operands) { - assert(init_param.input_count == 3); - assert(init_param.output_count == 1); - - // Each input should be interpreted as follows: - // - // 0 -> Lefthand side operand - // 1 -> Righthand side operand - - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - OperandIndexSequence outputs{init_param.outputs[0]}; - - operation::Sub::Param param; - - const auto activation_index = OperandIndex{init_param.inputs[2]}; - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); - - return new operation::Sub{inputs, outputs, param}; - }; + _map[ANEURALNETWORKS_SUB] = + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB); _map[ANEURALNETWORKS_SLICE] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -611,27 +734,8 @@ OperationFactory::OperationFactory() return new operation::Transpose{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_MUL] = [](const OperationFactory::Param &init_param, Operands &operands) { - assert(init_param.input_count == 3 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> LHS Tensor Index - // 1 -> RHS Tensor Index - // 2 -> Activation Index - - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Mul::Param param; - - const auto activation_index = OperandIndex{init_param.inputs[2]}; - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); - - return new operation::Mul{inputs, outputs, param}; - }; + _map[ANEURALNETWORKS_MUL] = + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::MUL); _map[ANEURALNETWORKS_SQUEEZE] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -672,34 +776,18 @@ OperationFactory::OperationFactory() return new operation::Squeeze{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_TANH] = CreateSimpleUnaryOp<operation::Tanh>; + _map[ANEURALNETWORKS_TANH] = getElementwiseActivationGenerator( + onert::ir::operation::ElementwiseActivation::Type::TANH, 1.f, 1.f); - _map[ANEURALNETWORKS_LOG] = CreateSimpleUnaryOp<operation::Log>; + _map[ANEURALNETWORKS_LOG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOG); - _map[ANEURALNETWORKS_LOGISTIC] = CreateSimpleUnaryOp<operation::Logistic>; + _map[ANEURALNETWORKS_LOGISTIC] = getElementwiseActivationGenerator( + onert::ir::operation::ElementwiseActivation::Type::LOGISTIC); - _map[ANEURALNETWORKS_DIV] = [](const OperationFactory::Param &init_param, Operands &operands) { - assert(init_param.input_count == 3 && init_param.output_count == 1); + _map[ANEURALNETWORKS_DIV] = + getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::DIV); - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> LHS Tensor Index - // 1 -> RHS Tensor Index - // 2 -> Activation Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Div::Param param; - - const auto activation_index = OperandIndex{init_param.inputs[2]}; - param.activation = - NNAPIConvert::getFusedActivation(operands.at(activation_index).asScalar<FuseCode>()); - - return new operation::Div{inputs, outputs, param}; - }; - - _map[ANEURALNETWORKS_EXP] = CreateSimpleUnaryOp<operation::Exp>; + _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP); // ANEURALNETWORKS_EXP_EX is deprecated // TODO Remove ANEURALNETWORKS_EXP_EX @@ -710,39 +798,17 @@ OperationFactory::OperationFactory() // 1 -> Axis Tensor Index _map[ANEURALNETWORKS_EXPAND_DIMS] = createSimpleBinaryOp<operation::ExpandDims>; - _map[ANEURALNETWORKS_GREATER] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::Greater; - - return new operation::Comparison{inputs, outputs, param}; - }; - - _map[ANEURALNETWORKS_GREATER_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual; - - return new operation::Comparison{inputs, outputs, param}; - }; + _map[ANEURALNETWORKS_GREATER] = + getComparisonGenerator(operation::Comparison::ComparisonType::Greater); + _map[ANEURALNETWORKS_GREATER_EQUAL] = + getComparisonGenerator(operation::Comparison::ComparisonType::GreaterEqual); + _map[ANEURALNETWORKS_LESS] = getComparisonGenerator(operation::Comparison::ComparisonType::Less); + _map[ANEURALNETWORKS_LESS_EQUAL] = + getComparisonGenerator(operation::Comparison::ComparisonType::LessEqual); + _map[ANEURALNETWORKS_NOT_EQUAL] = + getComparisonGenerator(operation::Comparison::ComparisonType::NotEqual); + _map[ANEURALNETWORKS_EQUAL] = + getComparisonGenerator(operation::Comparison::ComparisonType::Equal); // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX @@ -767,40 +833,6 @@ OperationFactory::OperationFactory() return new operation::Comparison{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_LESS] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::Less; - - return new operation::Comparison{inputs, outputs, param}; - }; - - _map[ANEURALNETWORKS_LESS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::LessEqual; - - return new operation::Comparison{inputs, outputs, param}; - }; - // ANEURALNETWORKS_LESS_EX is deprecated // TODO Remove ANEURALNETWORKS_LESS_EX _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param, @@ -837,23 +869,6 @@ OperationFactory::OperationFactory() // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX]; - _map[ANEURALNETWORKS_NOT_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input1 Tensor Index - // 1 -> input2 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::NotEqual; - - return new operation::Comparison{inputs, outputs, param}; - }; - // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param, @@ -877,7 +892,8 @@ OperationFactory::OperationFactory() return new operation::Comparison{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_LOGICAL_AND] = createSimpleBinaryOp<operation::LogicalAnd>; + _map[ANEURALNETWORKS_LOGICAL_AND] = getElementwiseBinaryGenerator( + operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND); // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX @@ -898,10 +914,14 @@ OperationFactory::OperationFactory() replaceDataType(operands, inputs.at(1), DataType::BOOL8); replaceDataType(operands, outputs.at(0), DataType::BOOL8); - return new operation::LogicalAnd{inputs, outputs}; + operation::ElementwiseBinary::Param param; + param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND; + + return new operation::ElementwiseBinary{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_RSQRT] = CreateSimpleUnaryOp<operation::RSQRT>; + _map[ANEURALNETWORKS_RSQRT] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT); _map[ANEURALNETWORKS_SELECT] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -937,7 +957,9 @@ OperationFactory::OperationFactory() // TODO Remove ANEURALNETWORKS_RSQRT_EX _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT]; - _map[ANEURALNETWORKS_RELU] = CreateSimpleUnaryOp<operation::ReLU>; + _map[ANEURALNETWORKS_RELU] = + getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU, + onert::ir::operation::ElementwiseActivation::infinity, 0); _map[ANEURALNETWORKS_RESIZE_BILINEAR] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -960,9 +982,11 @@ OperationFactory::OperationFactory() return new operation::ResizeBilinear{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_RELU1] = CreateSimpleUnaryOp<operation::ReLU1>; + _map[ANEURALNETWORKS_RELU1] = getElementwiseActivationGenerator( + onert::ir::operation::ElementwiseActivation::Type::RELU, 1.f, -1.f); - _map[ANEURALNETWORKS_RELU6] = CreateSimpleUnaryOp<operation::ReLU6>; + _map[ANEURALNETWORKS_RELU6] = getElementwiseActivationGenerator( + onert::ir::operation::ElementwiseActivation::Type::RELU, 6.f, 0.f); _map[ANEURALNETWORKS_REVERSE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 2 && init_param.output_count == 1); @@ -1009,17 +1033,8 @@ OperationFactory::OperationFactory() return new operation::RNN{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_FLOOR] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // 0 -> input Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - - return new operation::Floor{inputs, outputs}; - }; + _map[ANEURALNETWORKS_FLOOR] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::FLOOR); _map[ANEURALNETWORKS_SPACE_TO_BATCH_ND] = [](const OperationFactory::Param &init_param, Operands &) { @@ -1059,7 +1074,7 @@ OperationFactory::OperationFactory() return new operation::SpaceToDepth{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_L2_POOL_2D] = createPool2DOp<operation::L2Pool2D>; + _map[ANEURALNETWORKS_L2_POOL_2D] = getPool2DGenerator(operation::Pool2D::PoolType::L2); _map[ANEURALNETWORKS_EMBEDDING_LOOKUP] = [](const OperationFactory::Param &init_param, Operands &) { @@ -1157,35 +1172,15 @@ OperationFactory::OperationFactory() return new operation::TransposeConv{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_SQRT] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // 0 -> input Tensor Index - - OperandIndexSequence inputs{init_param.inputs[0]}; - return new operation::SQRT{inputs, outputs}; - }; + _map[ANEURALNETWORKS_SQRT] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT); // ANEURALNETWORKS_SQRT_EX is deprecated // TODO Remove ANEURALNETWORKS_SQRT_EX _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT]; - _map[ANEURALNETWORKS_LOGICAL_OR] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - return new operation::LogicalOr{inputs, outputs}; - }; + _map[ANEURALNETWORKS_LOGICAL_OR] = getElementwiseBinaryGenerator( + operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR); // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX @@ -1206,10 +1201,14 @@ OperationFactory::OperationFactory() replaceDataType(operands, inputs.at(1), DataType::BOOL8); replaceDataType(operands, outputs.at(0), DataType::BOOL8); - return new operation::LogicalOr{inputs, outputs}; + operation::ElementwiseBinary::Param param; + param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR; + + return new operation::ElementwiseBinary{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_LOGICAL_NOT] = CreateSimpleUnaryOp<operation::LogicalNot>; + _map[ANEURALNETWORKS_LOGICAL_NOT] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT); // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX @@ -1228,7 +1227,10 @@ OperationFactory::OperationFactory() replaceDataType(operands, inputs.at(0), DataType::BOOL8); replaceDataType(operands, outputs.at(0), DataType::BOOL8); - return new operation::LogicalNot{inputs, outputs}; + operation::ElementwiseUnary::Param param; + param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT; + + return new operation::ElementwiseUnary{inputs, outputs, param}; }; _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -1306,23 +1308,6 @@ OperationFactory::OperationFactory() return new operation::LSTM{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_EQUAL] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 2 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // - // 0 -> input0 Tensor Index - // 1 -> input1 Tensor Index - OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]}; - - operation::Comparison::Param param; - param.comparison_type = operation::Comparison::ComparisonType::Equal; - - return new operation::Comparison{inputs, outputs, param}; - }; - // ANEURALNETWORKS_EQUAL_EX is deprecated // TODO Remove ANEURALNETWORKS_EQUAL_EX _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param, @@ -1409,13 +1394,13 @@ OperationFactory::OperationFactory() // TODO Remove ANEURALNETWORKS_GATHER_EX _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER]; - _map[ANEURALNETWORKS_NEG] = CreateSimpleUnaryOp<operation::Neg>; + _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG); // ANEURALNETWORKS_NEG_EX is deprecated // TODO Remove ANEURALNETWORKS_NEG_EX _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG]; - _map[ANEURALNETWORKS_ABS] = CreateSimpleUnaryOp<operation::Abs>; + _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS); // ANEURALNETWORKS_ABS_EX is deprecated // TODO Remove ANEURALNETWORKS_ABS_EX @@ -1434,6 +1419,8 @@ OperationFactory::OperationFactory() operation::ArgMax::Param param; param.axis = operands.at(OperandIndex{init_param.inputs[1]}).asScalar<std::int32_t>(); + // NNAPI ARGMAX output type is always int32 + param.output_type = DataType::INT32; return new operation::ArgMax{inputs, outputs, param}; }; @@ -1442,7 +1429,8 @@ OperationFactory::OperationFactory() // TODO Remove ANEURALNETWORKS_ARGMAX_EX _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX]; - _map[ANEURALNETWORKS_DEQUANTIZE] = CreateSimpleUnaryOp<operation::Dequantize>; + _map[ANEURALNETWORKS_DEQUANTIZE] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::DEQUANTIZE); _map[ANEURALNETWORKS_MEAN] = [](const OperationFactory::Param &init_param, Operands &operands) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1600,9 +1588,11 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_PAD_V2] = _map[ANEURALNETWORKS_PAD]; - _map[ANEURALNETWORKS_MINIMUM] = createSimpleBinaryOp<operation::Min>; + _map[ANEURALNETWORKS_MINIMUM] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MIN); - _map[ANEURALNETWORKS_MAXIMUM] = createSimpleBinaryOp<operation::Max>; + _map[ANEURALNETWORKS_MAXIMUM] = + getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::MAX); _map[ANEURALNETWORKS_ONE_HOT_EX] = [](const OperationFactory::Param &init_param, Operands &operands) { @@ -1628,23 +1618,10 @@ OperationFactory::OperationFactory() return new operation::OneHot{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_COS_EX] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence inputs{init_param.inputs[0]}; - OperandIndexSequence outputs{init_param.outputs[0]}; - - return new operation::Cos{inputs, outputs}; - }; + _map[ANEURALNETWORKS_COS_EX] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::COS); - _map[ANEURALNETWORKS_SIN] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence inputs{init_param.inputs[0]}; - OperandIndexSequence outputs{init_param.outputs[0]}; - - return new operation::Sin{inputs, outputs}; - }; + _map[ANEURALNETWORKS_SIN] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SIN); _map[ANEURALNETWORKS_SHAPE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 1 && init_param.output_count == 1); @@ -1658,17 +1635,8 @@ OperationFactory::OperationFactory() _map[ANEURALNETWORKS_REDUCE_PROD] = getReduceGenerator(onert::ir::operation::Reduce::ReduceType::PROD); - _map[ANEURALNETWORKS_ROUND_EX] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // 0 -> input Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - - return new operation::Round{inputs, outputs}; - }; + _map[ANEURALNETWORKS_ROUND_EX] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ROUND); _map[ANEURALNETWORKS_RANGE_EX] = [](const OperationFactory::Param &init_param, Operands &) { assert(init_param.input_count == 3 && init_param.output_count == 1); @@ -1695,18 +1663,8 @@ OperationFactory::OperationFactory() // 1 -> A 1-D tensor, specifying the value _map[ANEURALNETWORKS_FILL_EX] = createSimpleBinaryOp<operation::Fill>; - _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence outputs{init_param.outputs[0]}; - - // Each input should be interpreted as follows: - // 0 -> input Tensor Index - OperandIndexSequence inputs{init_param.inputs[0]}; - - return new operation::ZerosLike{inputs, outputs}; - }; - + _map[ANEURALNETWORKS_ZEROS_LIKE_EX] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ZEROS_LIKE); // Each input should be interpreted as follows: // 0 -> Input Tensor Index // 1 -> Multiple Tensor Index @@ -1845,14 +1803,8 @@ OperationFactory::OperationFactory() return new operation::LogSoftmax{inputs, outputs, param}; }; - _map[ANEURALNETWORKS_QUANTIZE] = [](const OperationFactory::Param &init_param, Operands &) { - assert(init_param.input_count == 1 && init_param.output_count == 1); - - OperandIndexSequence inputs{init_param.inputs[0]}; - OperandIndexSequence outputs{init_param.outputs[0]}; - - return new operation::Quantize{inputs, outputs}; - }; + _map[ANEURALNETWORKS_QUANTIZE] = + getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::QUANTIZE); } Operation *OperationFactory::create(ANeuralNetworksOperationType type, diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc index 86c2c6bc7..7eef15717 100644 --- a/runtime/onert/frontend/tflite/src/tflite_loader.cc +++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc @@ -90,12 +90,14 @@ public: // Set inputs for (const std::int32_t input_ind : *tflite_subg->inputs()) { - subg->addInput(tensorIdxToOperandIdx(input_ind)); + subg->addInput(tensorIdxToOperandIdx(input_ind), + _tensor_names.at(_tensor_to_operand[input_ind])); } // Set outputs for (const std::int32_t output_ind : *tflite_subg->outputs()) { - subg->addOutput(tensorIdxToOperandIdx(output_ind)); + subg->addOutput(tensorIdxToOperandIdx(output_ind), + _tensor_names.at(_tensor_to_operand[output_ind])); } // Create operations for (const auto *op : *tflite_subg->operators()) diff --git a/runtime/onert/test/core/compiler/Scheduler.cc b/runtime/onert/test/core/compiler/Scheduler.cc index 94f51ddd6..50f3964db 100644 --- a/runtime/onert/test/core/compiler/Scheduler.cc +++ b/runtime/onert/test/core/compiler/Scheduler.cc @@ -22,9 +22,7 @@ #include <ir/TypeInfo.h> #include <ir/DataType.h> -#include <ir/operation/Add.h> -#include <ir/operation/Sub.h> -#include <ir/operation/Mul.h> +#include <ir/operation/BinaryArithmetic.h> #include <ir/operation/FullyConnected.h> #include <gtest/gtest.h> @@ -209,8 +207,7 @@ using OIS = OperandIndexSequence; template <typename NodeT, typename... Types> OperationIndex create(std::shared_ptr<Graph> graph, Types &&... args) { - typename NodeT::Param op_params{Activation::NONE}; - auto op = std::make_unique<NodeT>(std::forward<Types>(args)..., op_params); + auto op = std::make_unique<NodeT>(std::forward<Types>(args)...); auto op_idx = graph->addOperation(std::move(op)); // For now in scheduler test all operations in tested graphs has same size (for simplicity) assert(calcOpSize(graph, op_idx) == OPERATION_SIZE); @@ -227,17 +224,20 @@ std::shared_ptr<Graph> createStraightGraph() auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); // Create sub node auto sub_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Sub>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_out_idx, sub_const_idx}, OIS{sub_out_idx}, sub_op_params); // Create mul node auto mul_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto mul_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Mul>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}); + BinaryArithmetic::Param mul_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{sub_out_idx, mul_const_idx}, OIS{mul_out_idx}, mul_op_params); graph->finishBuilding(); return graph; @@ -261,31 +261,39 @@ std::shared_ptr<Graph> createBranchedGraph() auto add_lhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto add_rhs_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto add_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Add>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}); + BinaryArithmetic::Param add_op_params{BinaryArithmetic::ArithmeticType::ADD, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_lhs_idx, add_rhs_idx}, OIS{add_out_idx}, add_op_params); // Create mul1 node auto mul1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto mul1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Mul>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}); + BinaryArithmetic::Param mul1_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{add_out_idx, mul1_const_idx}, OIS{mul1_out_idx}, + mul1_op_params); // Create mul2 node auto mul2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto mul2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Mul>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}); + BinaryArithmetic::Param mul2_op_params{BinaryArithmetic::ArithmeticType::MUL, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{mul1_out_idx, mul2_const_idx}, OIS{mul2_out_idx}, + mul2_op_params); // Create fc1 node auto fc1_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto fc1_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}); + FullyConnected::Param fc1_op_params{Activation::NONE}; + create<FullyConnected>(graph, OIS{add_out_idx, fc1_const_idx}, OIS{fc1_out_idx}, fc1_op_params); // Create fc2 node auto fc2_const_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); auto fc2_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}); + FullyConnected::Param fc2_op_params{Activation::NONE}; + create<FullyConnected>(graph, OIS{fc1_out_idx, fc2_const_idx}, OIS{fc2_out_idx}, fc2_op_params); - // Create add2 node + // Create sub node auto sub_out_idx = graph->addOperand(ir::Shape{OPERAND_ELEMS}, float_op); - create<Sub>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}); + BinaryArithmetic::Param sub_op_params{BinaryArithmetic::ArithmeticType::SUB, Activation::NONE}; + create<BinaryArithmetic>(graph, OIS{mul2_out_idx, fc2_out_idx}, OIS{sub_out_idx}, sub_op_params); graph->finishBuilding(); return graph; diff --git a/runtime/onert/test/core/exec/ExecInstance.cc b/runtime/onert/test/core/exec/ExecInstance.cc index 0fcf372c3..806b47ecc 100644 --- a/runtime/onert/test/core/exec/ExecInstance.cc +++ b/runtime/onert/test/core/exec/ExecInstance.cc @@ -20,7 +20,7 @@ #include "ir/Graph.h" #include "compiler/Compiler.h" #include "exec/Execution.h" -#include "ir/operation/Add.h" +#include "ir/operation/BinaryArithmetic.h" namespace { @@ -54,16 +54,20 @@ public: .at(operand_rhs2) .data(std::make_unique<CachedData>(reinterpret_cast<const uint8_t *>(&rhs2_data), 16)); // 2nd add operations (result2 <= result1 + rhs2) - operation::Add::Param param1; + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param1.activation = Activation::NONE; auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; - graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1)); - operation::Add::Param param2; + graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param2.activation = Activation::NONE; auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; - graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2)); + graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); // Identify model inputs and outputs graph->addInput(operand_lhs); graph->addInput(operand_rhs1); diff --git a/runtime/onert/test/core/interp/ExecManager.cc b/runtime/onert/test/core/interp/ExecManager.cc index 2e295ef40..09190bc58 100644 --- a/runtime/onert/test/core/interp/ExecManager.cc +++ b/runtime/onert/test/core/interp/ExecManager.cc @@ -21,7 +21,7 @@ #include "ir/Graph.h" #include "interp/InterpExecutor.h" #include "exec/Execution.h" -#include "ir/operation/Add.h" +#include "ir/operation/BinaryArithmetic.h" namespace { @@ -57,11 +57,13 @@ protected: // Add operations - operation::Add::Param param; + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param.activation = Activation::NONE; auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; - _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param)); + _graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); // Identify model inputs and outputs @@ -112,17 +114,21 @@ protected: // 2nd add operations (result2 <= result1 + rhs2) - operation::Add::Param param1; + operation::BinaryArithmetic::Param param1; + param1.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param1.activation = Activation::NONE; auto input_set1 = OperandIndexSequence{operand_lhs, operand_rhs1}; auto output_set1 = OperandIndexSequence{operand_result1}; - _graph->addOperation(std::make_unique<operation::Add>(input_set1, output_set1, param1)); + _graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set1, output_set1, param1)); - operation::Add::Param param2; + operation::BinaryArithmetic::Param param2; + param2.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param2.activation = Activation::NONE; auto input_set2 = OperandIndexSequence{operand_result1, operand_rhs2}; auto output_set2 = OperandIndexSequence{operand_result2}; - _graph->addOperation(std::make_unique<operation::Add>(input_set2, output_set2, param2)); + _graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set2, output_set2, param2)); // Identify model inputs and outputs @@ -170,11 +176,13 @@ protected: // Add operations - operation::Add::Param param; + operation::BinaryArithmetic::Param param; + param.arithmetic_type = operation::BinaryArithmetic::ArithmeticType::ADD; param.activation = Activation::NONE; auto input_set = OperandIndexSequence{operand_lhs, operand_rhs}; auto output_set = OperandIndexSequence{operand_result}; - _graph->addOperation(std::make_unique<operation::Add>(input_set, output_set, param)); + _graph->addOperation( + std::make_unique<operation::BinaryArithmetic>(input_set, output_set, param)); // Identify model inputs and outputs diff --git a/runtime/onert/test/util/ShapeInference.cc b/runtime/onert/test/util/ShapeInference.cc index 63a948d7b..aab33fab5 100644 --- a/runtime/onert/test/util/ShapeInference.cc +++ b/runtime/onert/test/util/ShapeInference.cc @@ -47,8 +47,9 @@ TEST(ShapeInference, Pool2DNodeSame) Stride stride{3, 7}; Padding padding{PaddingType::SAME}; - operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -56,8 +57,9 @@ TEST(ShapeInference, Pool2DNodeSame) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -72,8 +74,9 @@ TEST(ShapeInference, Pool2DNodeValid) Stride stride{3, 7}; Padding padding{PaddingType::VALID}; - operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -81,8 +84,9 @@ TEST(ShapeInference, Pool2DNodeValid) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -98,8 +102,9 @@ TEST(ShapeInference, Pool2DNodeExplicit) Stride stride{3, 7}; Padding padding{4, 3, 2, 1}; - operation::AvgPool2D::Param avg_pool_param{3, 6, stride, padding, Activation::NONE}; - auto infered_out_shape = onert::shape_inference::inferAvgPoolShape(in_shape, avg_pool_param); + operation::Pool2D::Param avg_pool_param{ + operation::Pool2D::PoolType::AVG, 3, 6, stride, padding, Activation::NONE}; + auto infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, avg_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -107,8 +112,9 @@ TEST(ShapeInference, Pool2DNodeExplicit) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 20); - operation::MaxPool2D::Param max_pool_param{3, 6, stride, padding, Activation::NONE}; - infered_out_shape = onert::shape_inference::inferMaxPoolShape(in_shape, max_pool_param); + operation::Pool2D::Param max_pool_param{ + operation::Pool2D::PoolType::MAX, 3, 6, stride, padding, Activation::NONE}; + infered_out_shape = onert::shape_inference::inferPoolShape(in_shape, max_pool_param); ASSERT_EQ(infered_out_shape.rank(), 4); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).N, 10); @@ -122,7 +128,8 @@ TEST(ShapeInference, Conv2D) Shape in_shape{10, 6, 12, 20}; Shape ker_shape{30, 3, 6, 20}; - operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE}; + operation::Conv2D::Param param{Stride{3, 7}, Padding{PaddingType::VALID}, Activation::NONE, + Dilation{1, 1}}; auto infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -131,7 +138,8 @@ TEST(ShapeInference, Conv2D) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 1); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); - param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE}; + param = operation::Conv2D::Param{Stride{3, 7}, Padding{PaddingType::SAME}, Activation::NONE, + Dilation{1, 1}}; infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); @@ -140,7 +148,8 @@ TEST(ShapeInference, Conv2D) ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).W, 2); ASSERT_EQ(infered_out_shape.asFeature(Layout::NHWC).C, 30); - param = operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE}; + param = + operation::Conv2D::Param{Stride{3, 7}, Padding{4, 3, 2, 1}, Activation::NONE, Dilation{1, 1}}; infered_out_shape = onert::shape_inference::inferConv2DShape(in_shape, ker_shape, param); ASSERT_EQ(infered_out_shape.rank(), 4); |