1 files changed, 455 insertions, 0 deletions
diff --git a/runtimes/neurun/backend/cpu/KernelGenerator.cc b/runtimes/neurun/backend/cpu/KernelGenerator.cc
new file mode 100644
index 000000000..61de75493
--- /dev/null
+++ b/runtimes/neurun/backend/cpu/KernelGenerator.cc
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <stdexcept>
+
+#include "cpp14/memory.h"
+#include "util/Padding.h"
+#include "kernel/OperationUtils.h"
+#include "kernel/ConvolutionLayer.h"
+#include "kernel/AvgPoolLayer.h"
+#include "kernel/MaxPoolLayer.h"
+#include "kernel/ConcatLayer.h"
+#include "kernel/FullyConnectedLayer.h"
+#include "kernel/ReshapeLayer.h"
+#include "kernel/SoftMaxLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/AddLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+
+#include "util/logging.h"
+
+#include "util/Utils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+
+KernelGenerator::KernelGenerator(const neurun::model::Operands &operand_ctx,
+                                 const std::shared_ptr<TensorBuilder> &tensor_builder,
+                                 const std::shared_ptr<custom::KernelRegistry> &kernel_registry)
+    : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_registry(kernel_registry),
+      _current_subg_layout(model::Layout::UNKNOWN)
+{
+  // DO NOTHING
+}
+
+void KernelGenerator::visit(const model::Subgraph &subgraph)
+{
+  _current_subg_layout = subgraph.getLayout();
+  for (const auto &e : subgraph.operations())
+  {
+    const auto &node = *(e.node);
+    _tensor_builder->preVisit(node);
+    node.accept(*this);
+    _tensor_builder->postVisit(node);
+  }
+}
+
+void KernelGenerator::visit(const model::operation::Conv2DNode &node)
+{
+  using model::operation::Conv2DNode;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+  const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
+                                                      stride, ker_width, ker_height);
+  const auto activation = node.param().activation;
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+  const auto ker_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ker_index), model::Layout::UNKNOWN);
+  const auto bias_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index);
+  auto ifm_alloc = _tensor_builder->at(ifm_index);
+  auto ker_alloc = _tensor_builder->at(ker_index);
+  auto bias_alloc = _tensor_builder->at(bias_index);
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConvolutionLayer>();
+
+  fn->configure(ifm_alloc->buffer(), ifm_backend_shape, ker_alloc->buffer(), ker_backend_shape,
+                bias_alloc->buffer(), bias_backend_shape, padding.left, padding.right, padding.top,
+                padding.bottom, stride.horizontal, stride.vertical, activation, ofm_alloc->buffer(),
+                ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node)
+{
+  using model::operation::DepthwiseConv2DNode;
+
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(DepthwiseConv2DNode::Input::INPUT)};
+  const auto ker_index{node.getInputs().at(DepthwiseConv2DNode::Input::KERNEL)};
+  const auto bias_index{node.getInputs().at(DepthwiseConv2DNode::Input::BIAS)};
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  // Kernel format is [1, kernel_height, kernel_width, depth_out].
+  const auto &ker_shape = _ctx.at(ker_index).shape();
+  const auto ker_height = ker_shape.dim(1);
+  const auto ker_width = ker_shape.dim(2);
+  const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
+                                                      stride, ker_width, ker_height);
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+  const auto ker_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ker_index), model::Layout::UNKNOWN);
+  const auto bias_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+  const auto multiplier = node.param().multiplier;
+  const auto activation = node.param().activation;
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index);
+  auto ifm_alloc = _tensor_builder->at(ifm_index);
+  auto ker_alloc = _tensor_builder->at(ker_index);
+  auto bias_alloc = _tensor_builder->at(bias_index);
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::DepthwiseConvolutionLayer>();
+
+  fn->configure(ifm_alloc->buffer(), ifm_backend_shape, ker_alloc->buffer(), ker_backend_shape,
+                bias_alloc->buffer(), bias_backend_shape, padding.left, padding.right, padding.top,
+                padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
+                ofm_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::MaxPool2DNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  const auto padding =
+      neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto activation = node.param().activation;
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MaxPoolLayer>();
+
+  fn->configure(ifm_alloc->buffer(), ifm_backend_shape, padding.left, padding.right, padding.top,
+                padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
+                ofm_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::AvgPool2DNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};
+
+  const auto kh = node.param().kh;
+  const auto kw = node.param().kw;
+  const auto stride = node.param().stride;
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+  const auto padding =
+      neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+  const auto activation = node.param().activation;
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AvgPoolLayer>();
+
+  fn->configure(ifm_alloc->buffer(), ifm_backend_shape, padding.left, padding.right, padding.top,
+                padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
+                ofm_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::ConcatNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+
+  const auto rank = _ctx.at(ofm_index).shape().rank();
+  const auto axis =
+      ::neurun::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_subg_layout);
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  std::vector<::neurun::backend::cpu::kernel::Shape> ifm_backend_shapes;
+  for (auto &in_idx : node.getInputs())
+    ifm_backend_shapes.emplace_back(
+        ::neurun::backend::cpu::kernel::getShape(_ctx.at(in_idx), _current_subg_layout));
+
+  auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+  std::vector<const uint8_t *> input_buffers;
+  for (auto &ifm_idx : node.getInputs())
+    input_buffers.emplace_back(_tensor_builder->at(ifm_idx).get()->buffer());
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConcatLayer>();
+
+  fn->configure(input_buffers, ifm_backend_shapes, axis, output_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
+{
+  using model::operation::FullyConnectedNode;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)};
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+  const auto weight_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(weight_index), model::Layout::UNKNOWN);
+  const auto bias_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+  const auto activation = node.param().activation;
+
+  auto output_alloc = _tensor_builder->at(output_index).get();
+  auto input_alloc = _tensor_builder->at(input_index).get();
+  auto weight_alloc = _tensor_builder->at(weight_index).get();
+  auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::FullyConnectedLayer>();
+
+  fn->configure(input_alloc->buffer(), ifm_backend_shape, weight_alloc->buffer(),
+                weight_backend_shape, bias_alloc->buffer(), bias_backend_shape, activation,
+                output_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::MulNode &) { throw std::runtime_error("NYI"); }
+
+void KernelGenerator::visit(const model::operation::ReshapeNode &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)};
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+
+  auto output_alloc = _tensor_builder->at(output_index).get();
+  auto input_alloc = _tensor_builder->at(input_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>();
+
+  fn->configure(input_alloc->buffer(), ifm_backend_shape, output_alloc->buffer(),
+                ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::SoftmaxNode &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)};
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+  const auto ifm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+
+  const auto beta = node.param().beta;
+
+  auto output_alloc = _tensor_builder->at(output_index).get();
+  auto input_alloc = _tensor_builder->at(input_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SoftMaxLayer>();
+
+  fn->configure(input_alloc->buffer(), ifm_backend_shape, beta, output_alloc->buffer(),
+                ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::AddNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
+  const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
+
+  const auto ofm_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+  const auto lhs_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(lhs_index), _current_subg_layout);
+  const auto rhs_backend_shape =
+      ::neurun::backend::cpu::kernel::getShape(_ctx.at(rhs_index), _current_subg_layout);
+
+  const auto activation = node.param().activation;
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+  auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AddLayer>();
+
+  fn->configure(lhs_alloc->buffer(), lhs_backend_shape, rhs_alloc->buffer(), rhs_backend_shape,
+                activation, ofm_alloc->buffer(), ofm_backend_shape);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::PermuteNode &node)
+{
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(0)};
+
+  const auto &shape = _ctx.at(output_index).shape();
+  const auto input_backend_ctx = node.param().input_backend_ctx;
+  const auto output_backend_ctx = node.param().output_backend_ctx;
+  const auto data_type = node.getDataType();
+
+  output_backend_ctx->tensor_builder->preVisit(node);
+
+  auto output_object = output_backend_ctx->tensor_builder->wrapTensor(output_index);
+  auto input_object = input_backend_ctx->tensor_builder->wrapTensor(input_index);
+
+  auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PermuteLayer>();
+
+  // TODO Support NCHW frontend
+  auto out_shape = shape;
+  if (shape.rank() == 4 && output_object->ptr()->layout() == model::Layout::NCHW)
+  {
+    out_shape.dim(1) = shape.dim(3);
+    out_shape.dim(2) = shape.dim(1);
+    out_shape.dim(3) = shape.dim(2);
+  }
+
+  const auto permute_type = node.getPermuteType();
+  // Check Permutation Type
+  const auto inferPermuteType = [&]() {
+    if (input_object->ptr()->layout() == model::Layout::NHWC &&
+        output_object->ptr()->layout() == model::Layout::NCHW)
+    {
+      return model::operation::PermuteNode::Type::NHWC_TO_NCHW;
+    }
+    else if (input_object->ptr()->layout() == model::Layout::NCHW &&
+             output_object->ptr()->layout() == model::Layout::NHWC)
+    {
+      return model::operation::PermuteNode::Type::NCHW_TO_NHWC;
+    }
+    else
+    {
+      return model::operation::PermuteNode::Type::COPY;
+    }
+  }();
+  UNUSED_RELEASE(inferPermuteType);
+  assert(permute_type == inferPermuteType);
+
+  fn->configure(input_object, output_object, out_shape, permute_type, data_type);
+
+  input_backend_ctx->tensor_builder->postVisit(node);
+
+  _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::CustomNode &node)
+{
+  auto get_type_info = [this](const model::Operand &operand) -> custom::TypeInfo {
+    auto backendShape = ::neurun::backend::cpu::kernel::getShape(operand, _current_subg_layout);
+
+    custom::Shape shape(backendShape.dimensions.size());
+    for (size_t d = 0; d < backendShape.dimensions.size(); ++d)
+    {
+      shape.dim(d) = backendShape.dimensions[d];
+    }
+
+    return {shape, backendShape.type};
+  };
+
+  auto fill_op_info = [&](const model::OperandIndexSequence &opSeq,
+                          std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) {
+    for (auto &idx : opSeq)
+    {
+      const auto &operand = _ctx.at(idx);
+      // TODO make sure using `_current_subg_layout` is correct for custom operations
+      types.emplace_back(get_type_info(operand));
+      auto in_alloc = _tensor_builder->at(idx)->buffer();
+      allocs.emplace_back(in_alloc);
+    }
+  };
+
+  custom::Kernel::CustomKernelConfigParams params{};
+
+  fill_op_info(node.getInputs(), params.input_types, params.input_allocations);
+  fill_op_info(node.getOutputs(), params.output_types, params.output_allocations);
+
+  params.userdata = node.userdata().data;
+  params.userdata_size = node.userdata().size;
+
+  auto fn = _kernel_registry->buildKernelForOp(node.id());
+
+  fn->configure(std::move(params));
+
+  _execution_builder->append(std::move(fn));
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace neurun