diff options
Diffstat (limited to 'runtime/onert/backend/cpu/KernelGenerator.cc')
-rw-r--r-- | runtime/onert/backend/cpu/KernelGenerator.cc | 932 |
1 files changed, 932 insertions, 0 deletions
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc new file mode 100644 index 000000000..86764dd06 --- /dev/null +++ b/runtime/onert/backend/cpu/KernelGenerator.cc @@ -0,0 +1,932 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "KernelGenerator.h" + +#include "kernel/AbsLayer.h" +#include "kernel/AddLayer.h" +#include "kernel/AvgPoolLayer.h" +#include "kernel/CastLayer.h" +#include "kernel/CompareLayer.h" +#include "kernel/ConcatLayer.h" +#include "kernel/ConvolutionLayer.h" +#include "kernel/DepthwiseConvolutionLayer.h" +#include "kernel/DivLayer.h" +#include "kernel/ExpLayer.h" +#include "kernel/FullyConnectedLayer.h" +#include "kernel/GatherLayer.h" +#include "kernel/LogisticLayer.h" +#include "kernel/MaxLayer.h" +#include "kernel/MaxPoolLayer.h" +#include "kernel/MinLayer.h" +#include "kernel/MulLayer.h" +#include "kernel/OneHotLayer.h" +#include "kernel/OperationUtils.h" +#include "kernel/PackLayer.h" +#include "kernel/PadLayer.h" +#include "kernel/PermuteLayer.h" +#include "kernel/ReduceLayer.h" +#include "kernel/ReshapeLayer.h" +#include "kernel/RsqrtLayer.h" +#include "kernel/ShapeLayer.h" +#include "kernel/SinLayer.h" +#include "kernel/SliceLayer.h" +#include "kernel/SoftMaxLayer.h" +#include "kernel/StridedSliceLayer.h" +#include "kernel/SplitLayer.h" +#include "kernel/SubLayer.h" +#include "kernel/TanhLayer.h" +#include "kernel/TransposeLayer.h" +#include "kernel/UnpackLayer.h" + +#include <backend/Backend.h> +#include <backend/IConfig.h> +#include <memory> +#include <util/Utils.h> +#include <util/logging.h> + +#include <stdexcept> + +namespace onert +{ +namespace backend +{ +namespace cpu +{ + +KernelGenerator::KernelGenerator( + const ir::Operands &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder, + const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builer) + : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kernel_builer), + _current_op_seq_layout(ir::Layout::UNKNOWN) +{ + // DO NOTHING +} + +void KernelGenerator::visit(const ir::OpSequence &op_seq) +{ + // TODO Move this to IKernelGenerator + // (all derivatives have the same implementation for this) + assert(!_return_fn_seq); + _return_fn_seq = std::make_unique<exec::FunctionSequence>(); + _current_op_seq_layout = op_seq.getLayout(); + for (const auto &e : op_seq.operations()) + { + const auto &node = *(e.node); + node.accept(*this); + _return_fn_seq->append(releaseFunction()); + + // NOTE Permute node has tensors of the other backends + if (node.opcode() != ir::OpCode::Permute) + { + for (const auto &ind : node.getInputs() + node.getOutputs()) + { + auto tensor = _tensor_builder->at(ind); + if (tensor) + { + tensor->increase_ref(); + } + } + } + } +} + +void KernelGenerator::visit(const ir::operation::Conv2D &node) +{ + using ir::operation::Conv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + // Kernel format is [depth_out, kernel_height, kernel_width, depth_in]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding_type = node.param().padding.type; + const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, + ker_width, ker_height); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::ConvolutionLayer>(); + + fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding_type, padding.left, padding.right, + padding.top, padding.bottom, stride.horizontal, stride.vertical, activation, + ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node) +{ + using ir::operation::DepthwiseConv2D; + + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)}; + const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)}; + const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)}; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + // Kernel format is [1, kernel_height, kernel_width, depth_out]. + const auto &ker_shape = _ctx.at(ker_index).shape(); + const auto ker_height = ker_shape.dim(1); + const auto ker_width = ker_shape.dim(2); + const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, + ker_width, ker_height); + const auto multiplier = node.param().multiplier; + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + auto ker_alloc = _tensor_builder->at(ker_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::DepthwiseConvolutionLayer>(); + + fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top, + padding.bottom, stride.horizontal, stride.vertical, multiplier, activation, + ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::MaxPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)}; + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto padding = + ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxPoolLayer>(); + + fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom, + stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::AvgPool2D &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)}; + + const auto kh = node.param().kh; + const auto kw = node.param().kw; + const auto stride = node.param().stride; + const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout); + const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout); + const auto padding = + ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh); + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::AvgPoolLayer>(); + + fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom, + stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Concat &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto rank = _ctx.at(ofm_index).shape().rank(); + const auto axis = + ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout); + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + + std::vector<const operand::Tensor *> input_tensors; + for (auto &ifm_idx : node.getInputs()) + input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get()); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::ConcatLayer>(); + + fn->configure(input_tensors, axis, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::FullyConnected &node) +{ + using ir::operation::FullyConnected; + + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)}; + const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)}; + const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)}; + const auto activation = node.param().activation; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto weight_alloc = _tensor_builder->at(weight_index).get(); + auto bias_alloc = _tensor_builder->at(bias_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::FullyConnectedLayer>(); + + fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Reshape &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // optional 2nd input + operand::Tensor *shape_alloc = nullptr; + + if (node.getInputs().size() == 2) + { + const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)}; + shape_alloc = _tensor_builder->at(shape_index).get(); + } + + auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>(); + + fn->configure(input_alloc, shape_alloc, output_alloc); + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Squeeze &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + // Squeeze can share same kernel with reshape + auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>(); + + fn->configure(input_alloc, nullptr, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Softmax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)}; + + const auto beta = node.param().beta; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::SoftMaxLayer>(); + + fn->configure(input_alloc, beta, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Add &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::AddLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Comparison &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)}; + const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto comparison_type = node.param().comparison_type; + + auto fn = std::make_unique<::onert::backend::cpu::kernel::CompareLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Gather &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)}; + const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + + const auto backend_layout = output_alloc->layout(); + UNUSED_RELEASE(backend_layout); + + // NOTE The frontend layout and backend layout must be the same for this operation. + // If not the same, we have to add a stage(?) to perform permutation of output tensor. It + // is not not efficient even if it works well. If so, it would be better to set the + // layout of these backend tensors to the same layout. + // There is also one thing we have to think about. This operation depends on the layout of + // a model. For example, if a model in NHWC has this operation as output rank == 4, indices + // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W + // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case. + assert(backend_layout == input_alloc->layout()); + assert(backend_layout == indices_alloc->layout()); + const auto &input_shape = _ctx.at(input_index).shape(); + UNUSED_RELEASE(input_shape); + assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout); + + const auto axis_raw = node.param().axis; + const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::GatherLayer>(); + + fn->configure(input_alloc, indices_alloc, output_alloc, axis_value); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Sub &node) +{ + // The same as Add + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::SubLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Mul &node) +{ + // The same as Add + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::MulLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::OneHot &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)}; + + const auto depth = node.param().depth; + const auto on_value = node.param().on_value; + const auto off_value = node.param().off_value; + const auto axis = node.param().axis; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto indices_alloc = _tensor_builder->at(indices_index).get(); + + assert(indices_alloc->data_type() == OperandType::INT32); + assert(axis <= static_cast<int>(indices_alloc->num_dimensions())); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::OneHotLayer>(); + + fn->configure(indices_alloc, output_alloc, depth, on_value, off_value, axis); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Div &node) +{ + // The same as Add + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)}; + + const auto activation = node.param().activation; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::DivLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Permute &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + const auto &shape = _ctx.at(output_index).shape(); + const auto input_backend_ctx = node.param().input_backend_ctx; + const auto output_backend_ctx = node.param().output_backend_ctx; + const auto data_type = node.getDataType(); + + auto output_tensor = output_backend_ctx->tensor_builder->tensorAt(output_index); + auto input_tensor = input_backend_ctx->tensor_builder->tensorAt(input_index); + assert(output_tensor != nullptr); + assert(input_tensor != nullptr); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::PermuteLayer>(); + + // TODO Support NCHW frontend + auto out_shape = shape; + if (shape.rank() == 4 && output_tensor->layout() == ir::Layout::NCHW) + { + out_shape.dim(1) = shape.dim(3); + out_shape.dim(2) = shape.dim(1); + out_shape.dim(3) = shape.dim(2); + } + + const auto permute_type = node.getPermuteType(); + // Check Permutation Type + const auto inferPermuteType = [&]() { + if (input_tensor->layout() == ir::Layout::NHWC && output_tensor->layout() == ir::Layout::NCHW) + { + return ir::operation::Permute::Type::NHWC_TO_NCHW; + } + else if (input_tensor->layout() == ir::Layout::NCHW && + output_tensor->layout() == ir::Layout::NHWC) + { + return ir::operation::Permute::Type::NCHW_TO_NHWC; + } + else + { + return ir::operation::Permute::Type::COPY; + } + }(); + UNUSED_RELEASE(inferPermuteType); + assert(permute_type == inferPermuteType); + + fn->configure(input_tensor, output_tensor, out_shape, permute_type, data_type); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Custom &node) +{ + auto get_type_info = [](const ir::Operand &operand) -> custom::TypeInfo { + const auto &frontend_shape = operand.shape(); + custom::Shape shape(frontend_shape.rank()); + for (auto d = 0; d < frontend_shape.rank(); ++d) + { + shape.dim(d) = frontend_shape.dim(d); + } + + return {shape, operand.typeInfo().type()}; + }; + + auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq, + std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) { + for (auto &idx : opSeq) + { + const auto &operand = _ctx.at(idx); + // TODO make sure using `_current_op_seq_layout` is correct for custom operations + types.emplace_back(get_type_info(operand)); + auto in_alloc = _tensor_builder->at(idx)->buffer(); + allocs.emplace_back(in_alloc); + } + }; + + backend::custom::CustomKernelConfigParams params{}; + + fill_op_info(node.getInputs(), params.input_types, params.input_allocations); + fill_op_info(node.getOutputs(), params.output_types, params.output_allocations); + + params.userdata = node.userdata().data; + params.userdata_size = node.userdata().size; + + auto fn = _kernel_builder->buildKernel(node.id(), std::move(params)); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Exp &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::ExpLayer>(); + + fn->configure(input_alloc, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Logistic &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::LogisticLayer>(); + + fn->configure(input_alloc, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Tanh &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::TanhLayer>(); + + fn->configure(input_alloc, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Pack &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + + const auto rank = node.param().rank; + const auto axis = + ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout); + + assert(-rank <= axis && axis < rank); + + auto output_alloc = _tensor_builder->at(ofm_index).get(); + + std::vector<const operand::Tensor *> input_tensors; + for (auto &ifm_idx : node.getInputs()) + input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get()); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::PackLayer>(); + + fn->configure(input_tensors, axis, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Unpack &node) +{ + const auto input_index{node.getInputs().at(0)}; + + const auto rank = node.param().rank; + const auto axis = + ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout); + + assert(-rank <= axis && axis < rank); + + auto input_alloc = _tensor_builder->at(input_index).get(); + + std::vector<operand::Tensor *> output_tensors; + for (auto &output_idx : node.getOutputs()) + output_tensors.emplace_back(_tensor_builder->at(output_idx).get()); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::UnpackLayer>(); + + uint32_t axis_resolved = (axis < 0 ? axis + rank : axis); + + fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Pad &node) +{ + const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)}; + const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)}; + const auto output_index{node.getOutputs().at(0)}; + assert(_ctx.at(pad_index).data()); + + auto input = _tensor_builder->at(input_index).get(); + auto output = _tensor_builder->at(output_index).get(); + auto pad_rank = _ctx.at(pad_index).shape().dim(0); + auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base()); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::PadLayer>(); + + fn->configure(input, output, pad_base, pad_rank); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Max &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Min &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)}; + const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto lhs_alloc = _tensor_builder->at(lhs_index).get(); + auto rhs_alloc = _tensor_builder->at(rhs_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::MinLayer>(); + + fn->configure(lhs_alloc, rhs_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Cast &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::CastLayer>(); + + fn->configure(ifm_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Transpose &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto rank = node.param().rank; + + auto fn = std::make_unique<::onert::backend::cpu::kernel::TransposeLayer>(); + + fn->configure(input_alloc, output_alloc, node.param().perm, rank); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::ReduceSum &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<kernel::ReduceLayer>(); + + fn->configure(input_alloc, output_alloc, kernel::ReduceType::kSum, node.param().axes, + node.param().keep_dims); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::ReduceMax &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<kernel::ReduceLayer>(); + + fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMax, node.param().axes, + node.param().keep_dims); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::ReduceMin &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(0)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + + auto fn = std::make_unique<kernel::ReduceLayer>(); + + fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMin, node.param().axes, + node.param().keep_dims); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Slice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)}; + const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)}; + const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto begins_alloc = _tensor_builder->at(begins_index).get(); + auto sizes_alloc = _tensor_builder->at(sizes_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::SliceLayer>(); + + fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::StridedSlice &node) +{ + const auto output_index{node.getOutputs().at(0)}; + const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)}; + const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)}; + const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)}; + const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)}; + + auto output_alloc = _tensor_builder->at(output_index).get(); + auto input_alloc = _tensor_builder->at(input_index).get(); + auto starts_alloc = _tensor_builder->at(starts_index).get(); + auto ends_alloc = _tensor_builder->at(ends_index).get(); + auto strides_alloc = _tensor_builder->at(strides_index).get(); + + auto begin_mask = node.param().begin_mask; + auto end_mask = node.param().end_mask; + auto shrink_axis_mask = node.param().shrink_axis_mask; + auto rank = node.param().rank; + + auto fn = std::make_unique<::onert::backend::cpu::kernel::StridedSliceLayer>(); + + fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask, + end_mask, shrink_axis_mask, rank); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Split &node) +{ + const auto num_splits = node.param().num_splits; + assert(num_splits == static_cast<int>(node.getOutputs().size())); + + const auto rank = node.param().rank; + const auto axis = + ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout); + auto axis_resolved = axis < 0 ? axis + rank : axis; + assert(0 <= axis_resolved && axis_resolved < rank); + + const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)}; + auto in_tensor = _tensor_builder->at(input_idx).get(); + + std::vector<operand::Tensor *> out_tensors; + for (auto &output_idx : node.getOutputs()) + out_tensors.emplace_back(_tensor_builder->at(output_idx).get()); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::SplitLayer>(); + + fn->configure(in_tensor, num_splits, axis_resolved, out_tensors); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Abs &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::AbsLayer>(); + + fn->configure(ifm_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Sin &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::SinLayer>(); + + fn->configure(ifm_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::RSQRT &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::RsqrtLayer>(); + + fn->configure(ifm_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +void KernelGenerator::visit(const ir::operation::Shape &node) +{ + const auto ofm_index{node.getOutputs().at(0)}; + const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)}; + + auto ofm_alloc = _tensor_builder->at(ofm_index).get(); + auto ifm_alloc = _tensor_builder->at(ifm_index).get(); + + auto fn = std::make_unique<::onert::backend::cpu::kernel::ShapeLayer>(); + + fn->configure(ifm_alloc, ofm_alloc); + + _return_fn = std::move(fn); +} + +} // namespace cpu +} // namespace backend +} // namespace onert |