summaryrefslogtreecommitdiff
path: root/runtime/neurun/backend/acl_neon/KernelGenerator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/neurun/backend/acl_neon/KernelGenerator.cc')
-rw-r--r--runtime/neurun/backend/acl_neon/KernelGenerator.cc2152
1 files changed, 0 insertions, 2152 deletions
diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.cc b/runtime/neurun/backend/acl_neon/KernelGenerator.cc
deleted file mode 100644
index 85c6a0633..000000000
--- a/runtime/neurun/backend/acl_neon/KernelGenerator.cc
+++ /dev/null
@@ -1,2152 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "KernelGenerator.h"
-
-#include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
-#include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
-
-#include <Convert.h>
-#include <Swizzle.h>
-
-#include "util/Padding.h"
-#include "ir/Index.h"
-#include "ir/DataType.h"
-#include "ir/InternalType.h"
-#include "compiler/IExecutionBuilder.h"
-#include "exec/NopFunction.h"
-#include "util/logging.h"
-#include "util/Utils.h"
-
-using ::neurun::compiler::IExecutionBuilder;
-
-namespace neurun
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-using ::neurun::backend::acl_common::asAclFunction;
-
-//
-// ActivationBuilder
-//
-class ActivationBuilder
-{
-public:
- ActivationBuilder(IExecutionBuilder &builder) : _builder(builder)
- {
- // DO NOTHING
- }
-
-private:
- void appendReLU(::arm_compute::ITensor *ifm_alloc);
- void appendReLU1(::arm_compute::ITensor *ifm_alloc);
- void appendReLU6(::arm_compute::ITensor *ifm_alloc);
-
-public:
- void append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc);
-
-private:
- IExecutionBuilder &_builder;
-};
-
-void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
-{
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc, nullptr, act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _builder.append(std::move(acl_fn));
-}
-
-void ActivationBuilder::append(ir::Activation act, ::arm_compute::ITensor *ifm_alloc)
-{
- switch (act)
- {
- case ir::Activation::NONE:
- {
- // DO NOTHING
- break;
- }
- case ir::Activation::RELU:
- {
- appendReLU(ifm_alloc);
- break;
- }
- case ir::Activation::RELU1:
- {
- appendReLU1(ifm_alloc);
- break;
- }
- case ir::Activation::RELU6:
- {
- appendReLU6(ifm_alloc);
- break;
- }
- default:
- {
- throw std::runtime_error("Not supported, yet");
- }
- }
-}
-
-//
-// KernelGenerator
-//
-KernelGenerator::KernelGenerator(const ir::Operands &ctx,
- const std::shared_ptr<TensorBuilder> &tensor_builder)
- : _ctx(ctx), _tensor_builder(tensor_builder), _current_subg_layout(ir::Layout::UNKNOWN)
-{
- // DO NOTHING
-}
-
-void KernelGenerator::visit(const ir::OpSequence &op_seq)
-{
- _current_subg_layout = op_seq.getLayout();
- for (const auto &e : op_seq.operations())
- {
- const auto &node = *(e.node);
- _tensor_builder->preVisit(node);
- node.accept(*this);
- _tensor_builder->postVisit(node);
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Abs &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ArgMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
-
- const auto ifm_rank = node.param().rank;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto frontend_layout = _current_subg_layout;
- auto backend_layout = ifm_alloc->layout();
-
- int axis_value = node.param().axis;
- if (axis_value < 0)
- {
- axis_value += ifm_rank;
- }
- assert(axis_value >= 0 && axis_value < ifm_rank);
- const auto fixed_axis =
- acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
-
- // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
-
- // NOTE
- // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
- //{
- ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
- //}
- fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
- // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
- // arm_compute::ReductionOperation::ARG_IDX_MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBatchToSpaceLayer>();
-
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Cast &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NECast>();
-
- auto input_sub_type = _ctx.at(ifm_index).typeInfo().type() == ir::DataType::BOOL8
- ? arm_compute::SubDataType::BOOL
- : arm_compute::SubDataType::NONE;
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), input_sub_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Conv2D &node)
-{
- using ir::operation::Conv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEConvolutionLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
- conv_info, ::arm_compute::WeightsInfo(), ::arm_compute::Size2D(1U, 1U), act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
-
- auto block_size = node.param().block_size;
- assert(block_size > 0);
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthToSpaceLayerEx>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
-{
- using ir::operation::DepthwiseConv2D;
-
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
- const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
- const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- // Kernel format is [1, kernel_height, kernel_width, depth_out].
- const auto &ker_shape = _ctx.at(ker_index).shape();
- const auto ker_height = ker_shape.dim(1);
- const auto ker_width = ker_shape.dim(2);
-
- const auto stride = node.param().stride;
- const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
- stride, ker_width, ker_height);
- const auto multiplier = node.param().multiplier;
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
-
- const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
- const auto act_info = acl_common::asActivationLayerInfo(activation);
-
- if (ker_height == 3 && ker_width == 3)
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
- else
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
-}
-
-void KernelGenerator::visit(const ir::operation::Dequantize &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDequantizationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(MaxPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(MaxPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(MaxPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(MaxPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(MaxPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(MaxPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(MaxPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(MaxPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(MaxPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{::arm_compute::PoolingType::MAX,
- ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Mean &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Mean::Input::INPUT)};
- const auto &axes{node.param().axes};
- const auto keep_dims{node.param().keep_dims};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates fixed_axis;
- for (const auto axis : acl_axes)
- {
- fixed_axis.set(fixed_axis.num_dimensions(), axis);
- }
-
- // NOTE NEReduceMean has a bug that does not support NHWC layout
- // NEReduceMean intermediate tensors are always NCHW layout
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceMeanEx>();
-
- fn->configure(ifm_alloc->handle(), fixed_axis, keep_dims, ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
-
- const auto kh = node.param().kh;
- const auto kw = node.param().kw;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- VERBOSE(AvgPool2D) << "IFM_H: " << ifm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "IFM_W: " << ifm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "OFM_H: " << ofm_shape.H << std::endl;
- VERBOSE(AvgPool2D) << "OFM_W: " << ofm_shape.W << std::endl;
- VERBOSE(AvgPool2D) << "KER_H: " << kh << std::endl;
- VERBOSE(AvgPool2D) << "KER_W: " << kw << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_H: " << stride.vertical << std::endl;
- VERBOSE(AvgPool2D) << "STRIDE_W: " << stride.horizontal << std::endl;
- VERBOSE(AvgPool2D) << "PAD(T): " << padding.top << std::endl;
- VERBOSE(AvgPool2D) << "PAD(B): " << padding.bottom << std::endl;
- VERBOSE(AvgPool2D) << "PAD(L): " << padding.left << std::endl;
- VERBOSE(AvgPool2D) << "PAD(R): " << padding.right << std::endl;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::AVG, ::arm_compute::Size2D{kw, kh},
- acl_common::asPadStrideInfo(padding, stride), true /* exclude_padding */};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append((std::move(acl_fn)));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Concat &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- std::vector<ir::OperandIndex> input_indexes;
- for (const auto &input : node.getInputs())
- input_indexes.emplace_back(input);
-
- const auto axis = node.param().axis;
-
- // If tensor allocator allocate as subtensor
- bool canEliminate = true;
- for (auto ifm_ind : input_indexes)
- {
- if (!_tensor_builder->isSubTensorOf(ofm_index, ifm_ind))
- {
- canEliminate = false;
- break;
- }
- }
- if (canEliminate)
- {
- // If concat eliminated, return a NOP IFunction
- _execution_builder->append(nnfw::cpp14::make_unique<exec::NopFunction>());
- return;
- }
-
- auto output_alloc = _tensor_builder->at(ofm_index).get();
- std::vector<::arm_compute::ITensor *> input_tensors;
- for (const auto &ifm_ind : input_indexes)
- input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- if (input_indexes.size() < 2)
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
- l->configure(input_tensors.at(0), output_alloc->handle());
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEConcatenateLayer>();
- const auto rank = node.param().rank;
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- const auto fixed_axis =
- acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
- l->configure(input_tensors, output_alloc->handle(), fixed_axis);
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
- const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEEmbeddingLookup>();
-
- fn->configure(values_alloc->handle(), output_alloc->handle(), lookups_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Floor &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::FullyConnected &node)
-{
- using ir::operation::FullyConnected;
-
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
- const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
- const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
-
- const auto input_rank = _ctx.at(input_index).shape().rank();
-
- const auto output_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 1);
- UNUSED_RELEASE(output_size);
- assert(_ctx.at(bias_index).shape().dim(0) == output_size);
- assert(_ctx.at(weight_index).shape().dim(0) == output_size);
- const auto batch_size =
- _ctx.at(output_index).shape().dim(_ctx.at(output_index).shape().rank() - 2);
- const auto input_size =
- _ctx.at(weight_index).shape().dim(_ctx.at(weight_index).shape().rank() - 1);
-
- // Check for reshaping input's shape into rank-2
- bool needs_reshape = false;
- ir::Shape reshape(2);
- if (input_rank == 3 || input_rank == 4)
- {
- const auto &ifm_shape = _ctx.at(input_index).shape();
- auto feature_size = 1;
- for (int i = 0; i < ifm_shape.rank(); ++i)
- {
- feature_size *= ifm_shape.dim(i);
- }
-
- UNUSED_RELEASE(feature_size);
- assert(feature_size == batch_size * input_size);
-
- // for reshaping
- needs_reshape = true;
- reshape.dim(0) = batch_size; /* H */
- reshape.dim(1) = input_size; /* W */
- }
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- const auto input_alloc = _tensor_builder->at(input_index).get();
- const auto weight_alloc = _tensor_builder->at(weight_index).get();
- const auto bias_alloc = _tensor_builder->at(bias_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto acl_layout = output_alloc->handle()->info()->data_layout();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEFullyConnectedReshapingLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- arm_compute::NEFullyConnectedReshapingLayer::KernelType kernel_type =
- _ctx.at(weight_index).isConstant()
- ? arm_compute::NEFullyConnectedReshapingLayer::KernelType::PREPROCESSED_WEIGHTS
- : arm_compute::NEFullyConnectedReshapingLayer::KernelType::GENERAL;
-
- fn->configure(
- input_alloc->handle(), weight_alloc->handle(), bias_alloc->handle(), output_alloc->handle(),
- needs_reshape,
- ::neurun::backend::acl_common::asTensorShape(
- reshape, frontend_layout, ::neurun::backend::acl_common::asRuntimeLayout(acl_layout)),
- kernel_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, output_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
- const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
-
- const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
- const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
- const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hits_alloc = _tensor_builder->at(hits_index).get();
-
- auto lookups_alloc = _tensor_builder->at(lookups_index).get();
- auto keys_alloc = _tensor_builder->at(keys_index).get();
- auto values_alloc = _tensor_builder->at(values_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEHashtableLookup>();
-
- fn->configure(lookups_alloc->handle(), keys_alloc->handle(), values_alloc->handle(),
- output_alloc->handle(), hits_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Gather &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
- const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape();
-
- const auto ifm_rank = node.param().rank;
- const auto axis_raw = node.param().axis;
- const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
- // Converting in reverse order
- const int axis = ::neurun::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto indices_alloc = _tensor_builder->at(indices_index).get();
- const auto backend_layout = ofm_alloc->layout();
- UNUSED_RELEASE(backend_layout);
-
- // NOTE The frontend layout and backend layout must be the same for this operation.
- // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
- // is not not efficient even if it works well. If so, it would be better to set the
- // layout of these backend tensors to the same layout.
- // There is also one thing we have to think about. This operation depends on the layout of
- // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
- // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
- // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
- assert(backend_layout == ifm_alloc->layout());
- assert(backend_layout == indices_alloc->layout());
- assert(ifm_rank < 4 || _current_subg_layout == backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEGatherEx>();
-
- fn->configure(ifm_alloc->handle(), indices_alloc->handle(), ofm_alloc->handle(), axis);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
- const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
- const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto gamma_alloc = _tensor_builder->at(gamma_index).get();
- auto beta_alloc = _tensor_builder->at(beta_index).get();
- auto epsilon = node.param().epsilon;
- auto activation = node.param().activation;
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), gamma_alloc->handle(),
- beta_alloc->handle(), epsilon);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::L2Normalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
-
- // {CL|Neon}L2Normalization performs the reduction only along dimension 0
- // L2 Normalization always performs the reduction along the depth axis
- // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
- // choosing normalization parameters as below
-
- const auto &ifm_shape = _ctx.at(ifm_index).shape();
- // TODO Support optional constant dimension that normalization would be performed on
- const auto normalization_axis = node.param().rank - 1;
- int32_t radius =
- 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
- float alpha = 1.0f; // In the implementation to make alpha_ become 1
- float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
- float bias = 0.0f; // Don't offset the reduction.
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
- radius, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::L2Pool2D::Input::INPUT)};
-
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
-
- uint32_t kw = node.param().kw;
- uint32_t kh = node.param().kh;
- const auto stride = node.param().stride;
- const auto padding =
- neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- ::arm_compute::PoolingLayerInfo info{
- ::arm_compute::PoolingType::L2, ::arm_compute::Size2D{kw, kh},
- ::neurun::backend::acl_common::asPadStrideInfo(padding, stride)};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPoolingLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{
- node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
-
- auto radius = node.param().radius;
- auto alpha = node.param().alpha;
- auto beta = node.param().beta;
- auto bias = node.param().bias;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const auto norm_info = ::arm_compute::NormalizationLayerInfo(
- ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), norm_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalAnd>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalNot &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEBitwiseNot>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LogicalOr &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELogicalOr>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Logistic &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::LSTM &node)
-{
- // TODO Support dynamic rnn
- // TODO Fix subtle error in the case of non-CIFG, non-peephole and No Projection.
- const auto scratch_buffer_index{
- node.getOutputs().at(ir::operation::LSTM::Output::SCRATCH_BUFFER)};
- const auto output_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT_STATE_OUT)};
- const auto cell_state_out_index{
- node.getOutputs().at(ir::operation::LSTM::Output::CELL_STATE_OUT)};
- const auto output_index{node.getOutputs().at(ir::operation::LSTM::Output::OUTPUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::LSTM::Input::INPUT)};
- const auto input_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_INPUT_WEIGHTS)}; // optional
- const auto input_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_FORGET_WEIGHTS)};
- const auto input_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_CELL_WEIGHTS)};
- const auto input_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_TO_OUTPUT_WEIGHTS)};
- const auto recurrent_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_INPUT_WEIGHTS)}; // optional
- const auto recurrent_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_FORGET_WEIGHTS)};
- const auto recurrent_to_cell_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_CELL_WEIGHTS)};
- const auto recurrent_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::RECURRENT_TO_OUTPUT_WEIGHTS)};
- const auto cell_to_input_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_INPUT_WEIGHTS)}; // optional
- const auto cell_to_forget_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_FORGET_WEIGHTS)}; // optional
- const auto cell_to_output_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::CELL_TO_OUTPUT_WEIGHTS)}; // optional
- const auto input_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::INPUT_GATE_BIAS)};
- const auto forget_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::FORGET_GATE_BIAS)};
- const auto cell_bias_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_BIAS)};
- const auto output_gate_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_GATE_BIAS)};
- const auto projection_weights_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_WEIGHTS)}; // optional
- const auto projection_bias_index{
- node.getInputs().at(ir::operation::LSTM::Input::PROJECTION_BIAS)}; // optional
- const auto output_state_in_index{
- node.getInputs().at(ir::operation::LSTM::Input::OUTPUT_STATE_IN)};
- const auto cell_state_in_index{node.getInputs().at(ir::operation::LSTM::Input::CELL_STATE_IN)};
- const auto cell_threshold = node.param().cell_threshold;
- const auto projection_threshold = node.param().projection_threshold;
-
- bool has_input_to_input_weights = _ctx.at(input_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(input_to_input_weights_index).shape().dim(1) != 0;
- bool has_recurrent_to_input_weights =
- _ctx.at(recurrent_to_input_weights_index).shape().dim(0) != 0 &&
- _ctx.at(recurrent_to_input_weights_index).shape().dim(1) != 0;
- bool has_cell_to_forget_weights = _ctx.at(cell_to_forget_weights_index).shape().dim(0) != 0;
- bool has_cell_to_output_weights = _ctx.at(cell_to_output_weights_index).shape().dim(0) != 0;
- bool has_projection_weights = _ctx.at(projection_weights_index).shape().dim(0) != 0 &&
- _ctx.at(projection_weights_index).shape().dim(1) != 0;
- bool has_projection_bias = _ctx.at(projection_bias_index).shape().dim(0);
-
- // NOTE The input_to_input_weights and the recurrent_to_input_weights do not exist in CIFG.
- // true: no CIFG
- // false: CIFG
- // NOTE The cell_to_input_weights does not exist in non-peephole although regular LSTM(non-CIFG).
- bool has_cifg_param = has_input_to_input_weights && has_recurrent_to_input_weights;
-
- // NOTE The cell_to_forget_weights and the cell_to_output_weights exist in peephole.
- // But the cell_to_input_weights does not exist in regular CIFG although peephole.
- // true: peephole
- // false: no peephole
- bool has_peephole_param = has_cell_to_forget_weights && has_cell_to_output_weights;
-
- // NOTE Although the projection weights has data the projection bias may not have data.
- bool has_projection_param = has_projection_weights;
-
- const auto activation = node.param().activation;
- const auto cell_clip = cell_threshold;
- const auto projection_clip = projection_threshold;
- assert(cell_clip >= 0.f && projection_clip >= 0.f);
-
- auto scratch_buffer_alloc = _tensor_builder->at(scratch_buffer_index).get();
- auto output_state_out_alloc = _tensor_builder->at(output_state_out_index).get();
- auto cell_state_out_alloc = _tensor_builder->at(cell_state_out_index).get();
- auto output_alloc = _tensor_builder->at(output_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto input_to_forget_weights_alloc = _tensor_builder->at(input_to_forget_weights_index).get();
- auto input_to_cell_weights_alloc = _tensor_builder->at(input_to_cell_weights_index).get();
- auto input_to_output_weights_alloc = _tensor_builder->at(input_to_output_weights_index).get();
- auto recurrent_to_forget_weights_alloc =
- _tensor_builder->at(recurrent_to_forget_weights_index).get();
- auto recurrent_to_cell_weights_alloc = _tensor_builder->at(recurrent_to_cell_weights_index).get();
- auto recurrent_to_output_weights_alloc =
- _tensor_builder->at(recurrent_to_output_weights_index).get();
-
- auto forget_gate_bias_alloc = _tensor_builder->at(forget_gate_bias_index).get();
- auto cell_bias_alloc = _tensor_builder->at(cell_bias_index).get();
- auto output_gate_bias_alloc = _tensor_builder->at(output_gate_bias_index).get();
- auto output_state_in_alloc = _tensor_builder->at(output_state_in_index).get();
- auto cell_state_in_alloc = _tensor_builder->at(cell_state_in_index).get();
-
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NELSTMLayer>();
-
- ::arm_compute::LSTMParams<::arm_compute::ITensor> lstm_params{};
- if (has_cifg_param)
- {
- auto input_to_input_weights_alloc =
- _tensor_builder->at(input_to_input_weights_index).get(); // optional
- auto recurrent_to_input_weights_alloc =
- _tensor_builder->at(recurrent_to_input_weights_index).get(); // optional
- auto cell_to_input_weights_handle =
- has_peephole_param ? _tensor_builder->at(cell_to_input_weights_index).get()->handle()
- : nullptr; // optional (non-cifg && peephole)
- auto input_gate_bias_alloc = _tensor_builder->at(input_gate_bias_index).get(); // optional
- lstm_params.set_cifg_params(input_to_input_weights_alloc->handle(),
- recurrent_to_input_weights_alloc->handle(),
- cell_to_input_weights_handle, input_gate_bias_alloc->handle());
- }
- if (has_peephole_param)
- {
- auto cell_to_forget_weights_alloc =
- _tensor_builder->at(cell_to_forget_weights_index).get(); // optional
- auto cell_to_output_weights_alloc =
- _tensor_builder->at(cell_to_output_weights_index).get(); // optional
- lstm_params.set_peephole_params(cell_to_forget_weights_alloc->handle(),
- cell_to_output_weights_alloc->handle());
- }
- if (has_projection_param)
- {
- auto projection_weights_alloc = _tensor_builder->at(projection_weights_index).get(); // optional
- auto projection_bias_handle = has_projection_bias
- ? _tensor_builder->at(projection_bias_index).get()->handle()
- : nullptr; // optional
- lstm_params.set_projection_params(projection_weights_alloc->handle(), projection_bias_handle);
- }
-
- fn->configure(
- input_alloc->handle(), input_to_forget_weights_alloc->handle(),
- input_to_cell_weights_alloc->handle(), input_to_output_weights_alloc->handle(),
- recurrent_to_forget_weights_alloc->handle(), recurrent_to_cell_weights_alloc->handle(),
- recurrent_to_output_weights_alloc->handle(), forget_gate_bias_alloc->handle(),
- cell_bias_alloc->handle(), output_gate_bias_alloc->handle(), output_state_in_alloc->handle(),
- cell_state_in_alloc->handle(), scratch_buffer_alloc->handle(),
- output_state_out_alloc->handle(), cell_state_out_alloc->handle(), output_alloc->handle(),
- lstm_params, act_info, cell_clip, projection_clip);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Mul &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
-
- // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(), 1.0, // scale
- arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Neg &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NENegLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Pack &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- auto axis{node.param().axis};
-
- const auto output_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> input_indexes;
- for (const auto &input_index : node.getInputs())
- input_indexes.emplace_back(input_index);
-
- auto output = _tensor_builder->at(output_index).get()->handle();
- std::vector<arm_compute::ITensor *> inputs;
- for (const auto &input_index : input_indexes)
- inputs.emplace_back(_tensor_builder->at(input_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
-
- if (axis < 0)
- axis += output_rank;
- axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStackLayer>();
-
- fn->configure(inputs, axis, output);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Pad &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
- const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
- const auto output_index{node.getOutputs().at(0)};
- assert(_ctx.at(pad_index).isConstant());
-
- auto rank = node.param().rank;
- auto pad_base = _ctx.at(pad_index).data().base();
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- auto output = _tensor_builder->at(output_index).get()->handle();
-
- ::arm_compute::PaddingList padding_list;
- padding_list.resize(rank);
- for (int32_t n = 0; n < rank; ++n)
- {
- const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- const auto axis =
- acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
- padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
- }
-
- const auto input_type = _ctx.at(input_index).typeInfo();
- UNUSED_RELEASE(input_type);
- assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
- assert(input->info()->quantization_info() ==
- ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
- const auto pixel_value =
- ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPadLayer>();
- fn->configure(input, output, padding_list, pixel_value);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Permute &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(0)};
- const auto permute_type = node.getPermuteType();
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto rank = _ctx.at(ofm_idx).shape().rank();
- assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
-
- std::unique_ptr<::arm_compute::IFunction> fn;
- arm_compute::PermutationVector pv;
- if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
- {
- // WHCN -> CWHN
- pv = arm_compute::PermutationVector{2, 0, 1};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
- {
- // CWHN -> WHCN
- pv = arm_compute::PermutationVector{1, 2, 0};
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), pv);
-
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::PReLU &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
- const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto alpha_alloc = _tensor_builder->at(alpha_index).get();
-
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPReLU>();
-
- l->configure(ifm_alloc->handle(), alpha_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMax &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMax::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates reduce_axes;
- for (const auto axis : acl_axes)
- {
- reduce_axes.set(reduce_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>();
-
- fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
- ::arm_compute::ReduceOperation::MAX);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceMin &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReduceMin::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int ifm_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += ifm_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates reduce_axes;
- for (const auto axis : acl_axes)
- {
- reduce_axes.set(reduce_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceOperation>();
-
- fn->configure(ifm_alloc->handle(), reduce_axes, false, ofm_alloc->handle(),
- ::arm_compute::ReduceOperation::MIN);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReduceSum &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReduceSum::Input::INPUT)};
- const auto &axes{node.param().axes};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = input_alloc->layout();
-
- // Convert to ACL axes taking into account negative values and possible duplicates.
- std::set<std::uint32_t> acl_axes;
- const int input_rank = node.param().rank;
- for (int axis : axes)
- {
- if (axis < 0)
- axis += input_rank;
- acl_axes.insert(
- acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value());
- }
-
- arm_compute::Coordinates fixed_axes;
- for (const auto axis : acl_axes)
- {
- fixed_axes.set(fixed_axes.num_dimensions(), axis);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReduceSum>();
-
- fn->configure(input_alloc->handle(), fixed_axes, false, output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU1 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ReLU6 &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Reshape &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- // NOTE This operation must not be changed the layout from frontend to backend
- // So, PermutationOperationPass makes layouts of frontend and backend the same.
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = output_alloc->layout();
- assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
- frontend_layout == backend_layout);
- UNUSED_RELEASE(frontend_layout);
- UNUSED_RELEASE(backend_layout);
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
-
- const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEScale>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(),
- ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
- ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::RNN &node)
-{
- const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
- const auto hidden_state_out_index{
- node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
-
- const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
- const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
- const auto recurrent_weights_index{
- node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
- const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
- const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
-
- const auto activation = node.param().activation;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto hidden_state_out_alloc = _tensor_builder->at(hidden_state_out_index).get();
-
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto weights_alloc = _tensor_builder->at(weights_index).get();
- auto recurrent_weights_alloc = _tensor_builder->at(recurrent_weights_index).get();
- auto bias_alloc = _tensor_builder->at(bias_index).get();
- auto hidden_state_in_alloc = _tensor_builder->at(hidden_state_in_index).get();
- auto act_info = ::neurun::backend::acl_common::asActivationLayerInfo(activation);
-
- auto copy_layer = nnfw::cpp14::make_unique<::arm_compute::NECopy>();
- copy_layer->configure(hidden_state_in_alloc->handle(), hidden_state_out_alloc->handle());
- _execution_builder->append(asAclFunction(std::move(copy_layer)));
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NERNNLayerEx>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(input_alloc->handle(), weights_alloc->handle(), recurrent_weights_alloc->handle(),
- bias_alloc->handle(), hidden_state_out_alloc->handle(), output_alloc->handle(),
- act_info);
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::RSQRT &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NERsqrtLayer>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Squeeze &node)
-{
- // Squeeze is identical to reshape except that it has an optional dimensions input.
- // In addition, optional dims_index is ignored since output tensor already has squeezed shape
- // by freezer and toco
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
- const auto dims{node.param().dims};
- const auto ndim{node.param().ndim};
- (void)dims;
- (void)ndim;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEReshapeLayer>();
- fn->configure(input_alloc->handle(), output_alloc->handle());
- auto acl_fn = asAclFunction(std::move(fn));
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Tanh &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<arm_compute::NEActivationLayer>();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Softmax &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
- const auto beta = node.param().beta;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), beta);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
- const auto block_size_index{
- node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
- const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto block_size_alloc = _tensor_builder->at(block_size_index).get();
- auto paddings_alloc = _tensor_builder->at(paddings_index).get();
-
- assert(_ctx.at(block_size_index).isConstant());
- assert(_ctx.at(paddings_index).isConstant());
-
- // NESpaceToBatchLayer has a bug that padding's values are 0 even when zero point of QASYMM8 is
- // not 0.
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToBatchLayerEx>();
-
- fn->configure(ifm_alloc->handle(), block_size_alloc->handle(), paddings_alloc->handle(),
- ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
-
- auto block_size = node.param().block_size;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESpaceToDepthLayerEx>();
-
- fn->configure(ifm_alloc->handle(), ofm_alloc->handle(), block_size);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Split &node)
-{
- // TODO Support this op by SubTensor
- const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
-
- assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
-
- const auto ifm_rank = node.param().rank;
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output : node.getOutputs())
- output_indexes.emplace_back(output);
-
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- std::vector<arm_compute::ITensor *> output_allocs;
- for (const auto &ofm_ind : output_indexes)
- output_allocs.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
- auto axis = node.param().axis;
- if (axis < 0)
- axis += ifm_rank;
- axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESplit>();
-
- fn->configure(ifm_alloc->handle(), output_allocs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::SQRT &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- const ::arm_compute::ActivationLayerInfo act_info{
- ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle(), act_info);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Sub &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Slice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
- const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
- const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- {
- auto beginData_base = _ctx.at(begins_index).data().base();
- auto sizeData_base = _ctx.at(sizes_index).data().base();
- const int beginData_size = _ctx.at(begins_index).shape().num_elements();
- const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(beginData_size);
- UNUSED_RELEASE(sizeData_size);
-
- assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
- assert(beginData_size == input_rank);
- assert(sizeData_size == input_rank);
-
- assert(beginData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
- starts[axis] = begin_value;
-
- int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
- ends[axis] = begin_value + size_value;
- }
- }
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NESlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::StridedSlice &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
- const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
- const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
- const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
-
- auto outputData_alloc = _tensor_builder->at(output_index).get();
- auto inputData_alloc = _tensor_builder->at(input_index).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = inputData_alloc->layout();
-
- // Set initializers for indices data such as order of inputData
- int input_rank = node.param().rank;
- std::vector<int32_t> starts;
- std::vector<int32_t> ends;
- std::vector<int32_t> strides;
- starts.resize(input_rank, 0);
- ends.resize(input_rank, 0);
- strides.resize(input_rank, 0);
- {
- auto startData_base = _ctx.at(starts_index).data().base();
- auto endData_base = _ctx.at(ends_index).data().base();
- auto stridesData_base = _ctx.at(strides_index).data().base();
- const int startData_size = _ctx.at(starts_index).shape().num_elements();
- const int endData_size = _ctx.at(ends_index).shape().num_elements();
- const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
-
- using ir::DataType;
-
- UNUSED_RELEASE(startData_size);
- UNUSED_RELEASE(endData_size);
- UNUSED_RELEASE(stridesData_size);
-
- assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
- assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
- assert(startData_size == input_rank);
- assert(endData_size == input_rank);
- assert(stridesData_size == input_rank);
-
- assert(startData_base != nullptr);
- for (int n = 0; n < input_rank; ++n)
- {
- auto axis = ::neurun::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
- backend_layout)
- .value();
-
- int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
- starts[axis] = start_value;
-
- int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
- ends[axis] = end_value;
-
- int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
- strides[axis] = strides_value;
- }
- }
-
- // Set mask bits such as order of inputData
- // FIXME Take the layouts into account.
- const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
- const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
- const auto shrink_axis_mask =
- acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
-
- ::arm_compute::Coordinates starts_set;
- ::arm_compute::Coordinates ends_set;
- ::arm_compute::BiStrides strides_set;
-
- for (size_t i = 0; i < starts.size(); ++i)
- {
- starts_set.set(i, starts[i]);
- ends_set.set(i, ends[i]);
- strides_set.set(i, strides[i]);
- }
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEStridedSlice>();
-
- fn->configure(inputData_alloc->handle(), outputData_alloc->handle(), starts_set, ends_set,
- strides_set, begin_mask, end_mask, shrink_axis_mask);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::TransposeConv &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto output_shape_index{
- node.getInputs().at(ir::operation::TransposeConv::Input::OUTPUT_SHAPE)};
- const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
- const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
-
- const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
- const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
- const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_subg_layout);
-
- const auto stride = node.param().stride;
-
- assert((node.param().padding.type == ir::PaddingType::SAME) ||
- (node.param().padding.type == ir::PaddingType::VALID));
- auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
- ker_shape.W, ker_shape.H);
-
- uint32_t invalid_horizontal = 0;
- uint32_t invalid_vertical = 0;
- if (node.param().padding.type == ir::PaddingType::VALID)
- {
- invalid_horizontal =
- ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
- invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
- }
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto ifm_alloc = _tensor_builder->at(ifm_index).get();
- auto ker_alloc = _tensor_builder->at(ker_index).get();
-
- const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NETransposeConvLayer>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
- invalid_horizontal, invalid_vertical);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Transpose &node)
-{
- const auto ofm_idx{node.getOutputs().at(0)};
- const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
- const auto &perm{node.param().perm};
-
- auto ofm_alloc = _tensor_builder->at(ofm_idx).get();
- const auto ifm_alloc = _tensor_builder->at(ifm_idx).get();
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = ifm_alloc->layout();
-
- const auto rank = node.param().rank;
- std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
- auto backend_pv = ::neurun::backend::acl_common::getARMComputePermutationVector(
- rank, pv, frontend_layout, backend_layout);
-
- std::unique_ptr<::arm_compute::IFunction> fn;
-
- if (ifm_alloc->num_dimensions() <= 2 && ofm_alloc->num_dimensions() <= 2)
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NETranspose>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle());
-
- fn = std::move(l);
- }
- else
- {
- auto l = nnfw::cpp14::make_unique<::arm_compute::NEPermute>();
-
- l->configure(ifm_alloc->handle(), ofm_alloc->handle(), backend_pv);
-
- fn = std::move(l);
- }
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Unpack &node)
-{
- const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
- auto axis{node.param().axis};
-
- const auto input_rank = node.param().rank;
-
- std::vector<ir::OperandIndex> output_indexes;
- for (const auto &output_index : node.getOutputs())
- output_indexes.emplace_back(output_index);
-
- auto input = _tensor_builder->at(input_index).get()->handle();
- std::vector<arm_compute::ITensor *> outputs;
- for (const auto &output_index : output_indexes)
- outputs.emplace_back(_tensor_builder->at(output_index)->handle());
-
- const auto frontend_layout = _current_subg_layout;
- const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
- if (axis < 0)
- axis += input_rank;
- axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEUnstack>();
-
- fn->configure(input, outputs, axis);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
-}
-
-void KernelGenerator::visit(const ir::operation::Add &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle(),
- arm_compute::ConvertPolicy::SATURATE);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Div &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
-
- const auto activation = node.param().activation;
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseDivision>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-
- ActivationBuilder{*_execution_builder}.append(activation, ofm_alloc->handle());
-}
-
-void KernelGenerator::visit(const ir::operation::Exp &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input_alloc = _tensor_builder->at(input_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEExpLayer>();
-
- fn->configure(input_alloc->handle(), output_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Comparison &node)
-{
- const auto output_index{node.getOutputs().at(0)};
- const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
- const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
-
- const auto comparison_type = node.param().comparison_type;
-
- auto output_alloc = _tensor_builder->at(output_index).get();
- auto input0_alloc = _tensor_builder->at(input0_index).get();
- auto input1_alloc = _tensor_builder->at(input1_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseComparison>();
-
- fn->configure(input0_alloc->handle(), input1_alloc->handle(), output_alloc->handle(),
- (arm_compute::ComparisonOperation)comparison_type);
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Min &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMin>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-void KernelGenerator::visit(const ir::operation::Max &node)
-{
- const auto ofm_index{node.getOutputs().at(0)};
- const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
- const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
-
- auto ofm_alloc = _tensor_builder->at(ofm_index).get();
- auto lhs_alloc = _tensor_builder->at(lhs_index).get();
- auto rhs_alloc = _tensor_builder->at(rhs_index).get();
-
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEElementwiseMax>();
-
- fn->configure(lhs_alloc->handle(), rhs_alloc->handle(), ofm_alloc->handle());
-
- auto acl_fn = asAclFunction(std::move(fn));
-
- _execution_builder->append(std::move(acl_fn));
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace neurun