summaryrefslogtreecommitdiff
path: root/runtime/onert/backend/cpu/KernelGenerator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/backend/cpu/KernelGenerator.cc')
-rw-r--r--runtime/onert/backend/cpu/KernelGenerator.cc932
1 files changed, 932 insertions, 0 deletions
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc
new file mode 100644
index 000000000..86764dd06
--- /dev/null
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -0,0 +1,932 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "kernel/AbsLayer.h"
+#include "kernel/AddLayer.h"
+#include "kernel/AvgPoolLayer.h"
+#include "kernel/CastLayer.h"
+#include "kernel/CompareLayer.h"
+#include "kernel/ConcatLayer.h"
+#include "kernel/ConvolutionLayer.h"
+#include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/DivLayer.h"
+#include "kernel/ExpLayer.h"
+#include "kernel/FullyConnectedLayer.h"
+#include "kernel/GatherLayer.h"
+#include "kernel/LogisticLayer.h"
+#include "kernel/MaxLayer.h"
+#include "kernel/MaxPoolLayer.h"
+#include "kernel/MinLayer.h"
+#include "kernel/MulLayer.h"
+#include "kernel/OneHotLayer.h"
+#include "kernel/OperationUtils.h"
+#include "kernel/PackLayer.h"
+#include "kernel/PadLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/ReduceLayer.h"
+#include "kernel/ReshapeLayer.h"
+#include "kernel/RsqrtLayer.h"
+#include "kernel/ShapeLayer.h"
+#include "kernel/SinLayer.h"
+#include "kernel/SliceLayer.h"
+#include "kernel/SoftMaxLayer.h"
+#include "kernel/StridedSliceLayer.h"
+#include "kernel/SplitLayer.h"
+#include "kernel/SubLayer.h"
+#include "kernel/TanhLayer.h"
+#include "kernel/TransposeLayer.h"
+#include "kernel/UnpackLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+
+KernelGenerator::KernelGenerator(
+ const ir::Operands &operand_ctx, const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<backend::custom::IKernelBuilder> &kernel_builer)
+ : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_builder(kernel_builer),
+ _current_op_seq_layout(ir::Layout::UNKNOWN)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const ir::OpSequence &op_seq)
+{
+ // TODO Move this to IKernelGenerator
+ // (all derivatives have the same implementation for this)
+ assert(!_return_fn_seq);
+ _return_fn_seq = std::make_unique<exec::FunctionSequence>();
+ _current_op_seq_layout = op_seq.getLayout();
+ for (const auto &e : op_seq.operations())
+ {
+ const auto &node = *(e.node);
+ node.accept(*this);
+ _return_fn_seq->append(releaseFunction());
+
+ // NOTE Permute node has tensors of the other backends
+ if (node.opcode() != ir::OpCode::Permute)
+ {
+ for (const auto &ind : node.getInputs() + node.getOutputs())
+ {
+ auto tensor = _tensor_builder->at(ind);
+ if (tensor)
+ {
+ tensor->increase_ref();
+ }
+ }
+ }
+ }
+}
+
+void KernelGenerator::visit(const ir::operation::Conv2D &node)
+{
+ using ir::operation::Conv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding_type = node.param().padding.type;
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding_type, padding.left, padding.right,
+ padding.top, padding.bottom, stride.horizontal, stride.vertical, activation,
+ ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
+{
+ using ir::operation::DepthwiseConv2D;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
+ ker_width, ker_height);
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+ auto ker_alloc = _tensor_builder->at(ker_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::DepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc, ker_alloc, bias_alloc, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
+ ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::MaxPool2D::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxPoolLayer>();
+
+ fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::AvgPool2D::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
+ const auto padding =
+ ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AvgPoolLayer>();
+
+ fn->configure(ifm_alloc, padding.left, padding.right, padding.top, padding.bottom,
+ stride.horizontal, stride.vertical, kw, kh, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Concat &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto rank = _ctx.at(ofm_index).shape().rank();
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+ std::vector<const operand::Tensor *> input_tensors;
+ for (auto &ifm_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ConcatLayer>();
+
+ fn->configure(input_tensors, axis, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+ using ir::operation::FullyConnected;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto weight_alloc = _tensor_builder->at(weight_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::FullyConnectedLayer>();
+
+ fn->configure(input_alloc, weight_alloc, bias_alloc, activation, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Reshape &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // optional 2nd input
+ operand::Tensor *shape_alloc = nullptr;
+
+ if (node.getInputs().size() == 2)
+ {
+ const auto shape_index{node.getInputs().at(ir::operation::Reshape::Input::SHAPE)};
+ shape_alloc = _tensor_builder->at(shape_index).get();
+ }
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>();
+
+ fn->configure(input_alloc, shape_alloc, output_alloc);
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Squeeze &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ // Squeeze can share same kernel with reshape
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ReshapeLayer>();
+
+ fn->configure(input_alloc, nullptr, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Softmax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
+
+ const auto beta = node.param().beta;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SoftMaxLayer>();
+
+ fn->configure(input_alloc, beta, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Add &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AddLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Comparison &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto comparison_type = node.param().comparison_type;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::CompareLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, comparison_type, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Gather &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
+ const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+
+ const auto backend_layout = output_alloc->layout();
+ UNUSED_RELEASE(backend_layout);
+
+ // NOTE The frontend layout and backend layout must be the same for this operation.
+ // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
+ // is not not efficient even if it works well. If so, it would be better to set the
+ // layout of these backend tensors to the same layout.
+ // There is also one thing we have to think about. This operation depends on the layout of
+ // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
+ // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
+ // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
+ assert(backend_layout == input_alloc->layout());
+ assert(backend_layout == indices_alloc->layout());
+ const auto &input_shape = _ctx.at(input_index).shape();
+ UNUSED_RELEASE(input_shape);
+ assert(input_shape.rank() < 4 || _current_op_seq_layout == backend_layout);
+
+ const auto axis_raw = node.param().axis;
+ const auto axis_value = (axis_raw < 0 ? (input_shape.rank() + axis_raw) : axis_raw);
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::GatherLayer>();
+
+ fn->configure(input_alloc, indices_alloc, output_alloc, axis_value);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Sub &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SubLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Mul &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MulLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::OneHot &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto indices_index{node.getInputs().at(ir::operation::OneHot::INDICES)};
+
+ const auto depth = node.param().depth;
+ const auto on_value = node.param().on_value;
+ const auto off_value = node.param().off_value;
+ const auto axis = node.param().axis;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto indices_alloc = _tensor_builder->at(indices_index).get();
+
+ assert(indices_alloc->data_type() == OperandType::INT32);
+ assert(axis <= static_cast<int>(indices_alloc->num_dimensions()));
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::OneHotLayer>();
+
+ fn->configure(indices_alloc, output_alloc, depth, on_value, off_value, axis);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Div &node)
+{
+ // The same as Add
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::DivLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, activation, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Permute &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ const auto &shape = _ctx.at(output_index).shape();
+ const auto input_backend_ctx = node.param().input_backend_ctx;
+ const auto output_backend_ctx = node.param().output_backend_ctx;
+ const auto data_type = node.getDataType();
+
+ auto output_tensor = output_backend_ctx->tensor_builder->tensorAt(output_index);
+ auto input_tensor = input_backend_ctx->tensor_builder->tensorAt(input_index);
+ assert(output_tensor != nullptr);
+ assert(input_tensor != nullptr);
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PermuteLayer>();
+
+ // TODO Support NCHW frontend
+ auto out_shape = shape;
+ if (shape.rank() == 4 && output_tensor->layout() == ir::Layout::NCHW)
+ {
+ out_shape.dim(1) = shape.dim(3);
+ out_shape.dim(2) = shape.dim(1);
+ out_shape.dim(3) = shape.dim(2);
+ }
+
+ const auto permute_type = node.getPermuteType();
+ // Check Permutation Type
+ const auto inferPermuteType = [&]() {
+ if (input_tensor->layout() == ir::Layout::NHWC && output_tensor->layout() == ir::Layout::NCHW)
+ {
+ return ir::operation::Permute::Type::NHWC_TO_NCHW;
+ }
+ else if (input_tensor->layout() == ir::Layout::NCHW &&
+ output_tensor->layout() == ir::Layout::NHWC)
+ {
+ return ir::operation::Permute::Type::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return ir::operation::Permute::Type::COPY;
+ }
+ }();
+ UNUSED_RELEASE(inferPermuteType);
+ assert(permute_type == inferPermuteType);
+
+ fn->configure(input_tensor, output_tensor, out_shape, permute_type, data_type);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Custom &node)
+{
+ auto get_type_info = [](const ir::Operand &operand) -> custom::TypeInfo {
+ const auto &frontend_shape = operand.shape();
+ custom::Shape shape(frontend_shape.rank());
+ for (auto d = 0; d < frontend_shape.rank(); ++d)
+ {
+ shape.dim(d) = frontend_shape.dim(d);
+ }
+
+ return {shape, operand.typeInfo().type()};
+ };
+
+ auto fill_op_info = [&](const ir::OperandIndexSequence &opSeq,
+ std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) {
+ for (auto &idx : opSeq)
+ {
+ const auto &operand = _ctx.at(idx);
+ // TODO make sure using `_current_op_seq_layout` is correct for custom operations
+ types.emplace_back(get_type_info(operand));
+ auto in_alloc = _tensor_builder->at(idx)->buffer();
+ allocs.emplace_back(in_alloc);
+ }
+ };
+
+ backend::custom::CustomKernelConfigParams params{};
+
+ fill_op_info(node.getInputs(), params.input_types, params.input_allocations);
+ fill_op_info(node.getOutputs(), params.output_types, params.output_allocations);
+
+ params.userdata = node.userdata().data;
+ params.userdata_size = node.userdata().size;
+
+ auto fn = _kernel_builder->buildKernel(node.id(), std::move(params));
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Exp &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ExpLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Logistic &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::LogisticLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Tanh &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::TanhLayer>();
+
+ fn->configure(input_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pack &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ assert(-rank <= axis && axis < rank);
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+ std::vector<const operand::Tensor *> input_tensors;
+ for (auto &ifm_idx : node.getInputs())
+ input_tensors.emplace_back(_tensor_builder->at(ifm_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PackLayer>();
+
+ fn->configure(input_tensors, axis, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Unpack &node)
+{
+ const auto input_index{node.getInputs().at(0)};
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+
+ assert(-rank <= axis && axis < rank);
+
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ std::vector<operand::Tensor *> output_tensors;
+ for (auto &output_idx : node.getOutputs())
+ output_tensors.emplace_back(_tensor_builder->at(output_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::UnpackLayer>();
+
+ uint32_t axis_resolved = (axis < 0 ? axis + rank : axis);
+
+ fn->configure(input_alloc, axis_resolved, node.param().num, output_tensors);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Pad &node)
+{
+ const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
+ const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
+ const auto output_index{node.getOutputs().at(0)};
+ assert(_ctx.at(pad_index).data());
+
+ auto input = _tensor_builder->at(input_index).get();
+ auto output = _tensor_builder->at(output_index).get();
+ auto pad_rank = _ctx.at(pad_index).shape().dim(0);
+ auto pad_base = reinterpret_cast<const int32_t *>(_ctx.at(pad_index).data()->base());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::PadLayer>();
+
+ fn->configure(input, output, pad_base, pad_rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Max &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MaxLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Min &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::MinLayer>();
+
+ fn->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Cast &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::CastLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Transpose &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto rank = node.param().rank;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::TransposeLayer>();
+
+ fn->configure(input_alloc, output_alloc, node.param().perm, rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceSum &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kSum, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMax &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMax, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::ReduceMin &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = std::make_unique<kernel::ReduceLayer>();
+
+ fn->configure(input_alloc, output_alloc, kernel::ReduceType::kMin, node.param().axes,
+ node.param().keep_dims);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Slice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
+ const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
+ const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto begins_alloc = _tensor_builder->at(begins_index).get();
+ auto sizes_alloc = _tensor_builder->at(sizes_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SliceLayer>();
+
+ fn->configure(input_alloc, begins_alloc, sizes_alloc, output_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::StridedSlice &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
+ const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
+ const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
+ const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto starts_alloc = _tensor_builder->at(starts_index).get();
+ auto ends_alloc = _tensor_builder->at(ends_index).get();
+ auto strides_alloc = _tensor_builder->at(strides_index).get();
+
+ auto begin_mask = node.param().begin_mask;
+ auto end_mask = node.param().end_mask;
+ auto shrink_axis_mask = node.param().shrink_axis_mask;
+ auto rank = node.param().rank;
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::StridedSliceLayer>();
+
+ fn->configure(input_alloc, starts_alloc, ends_alloc, strides_alloc, output_alloc, begin_mask,
+ end_mask, shrink_axis_mask, rank);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Split &node)
+{
+ const auto num_splits = node.param().num_splits;
+ assert(num_splits == static_cast<int>(node.getOutputs().size()));
+
+ const auto rank = node.param().rank;
+ const auto axis =
+ ::onert::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_op_seq_layout);
+ auto axis_resolved = axis < 0 ? axis + rank : axis;
+ assert(0 <= axis_resolved && axis_resolved < rank);
+
+ const auto input_idx{node.getInputs().at(ir::operation::Split::Input::INPUT)};
+ auto in_tensor = _tensor_builder->at(input_idx).get();
+
+ std::vector<operand::Tensor *> out_tensors;
+ for (auto &output_idx : node.getOutputs())
+ out_tensors.emplace_back(_tensor_builder->at(output_idx).get());
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SplitLayer>();
+
+ fn->configure(in_tensor, num_splits, axis_resolved, out_tensors);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Abs &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::AbsLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Sin &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Sin::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::SinLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::RSQRT &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::RsqrtLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+void KernelGenerator::visit(const ir::operation::Shape &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(ir::operation::Shape::Input::INPUT)};
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = std::make_unique<::onert::backend::cpu::kernel::ShapeLayer>();
+
+ fn->configure(ifm_alloc, ofm_alloc);
+
+ _return_fn = std::move(fn);
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace onert