summaryrefslogtreecommitdiff
path: root/runtimes/neurun/backend/cpu/KernelGenerator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/backend/cpu/KernelGenerator.cc')
-rw-r--r--runtimes/neurun/backend/cpu/KernelGenerator.cc455
1 files changed, 455 insertions, 0 deletions
diff --git a/runtimes/neurun/backend/cpu/KernelGenerator.cc b/runtimes/neurun/backend/cpu/KernelGenerator.cc
new file mode 100644
index 000000000..61de75493
--- /dev/null
+++ b/runtimes/neurun/backend/cpu/KernelGenerator.cc
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include <stdexcept>
+
+#include "cpp14/memory.h"
+#include "util/Padding.h"
+#include "kernel/OperationUtils.h"
+#include "kernel/ConvolutionLayer.h"
+#include "kernel/AvgPoolLayer.h"
+#include "kernel/MaxPoolLayer.h"
+#include "kernel/ConcatLayer.h"
+#include "kernel/FullyConnectedLayer.h"
+#include "kernel/ReshapeLayer.h"
+#include "kernel/SoftMaxLayer.h"
+#include "kernel/PermuteLayer.h"
+#include "kernel/DepthwiseConvolutionLayer.h"
+#include "kernel/AddLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+
+#include "util/logging.h"
+
+#include "util/Utils.h"
+
+namespace neurun
+{
+namespace backend
+{
+namespace cpu
+{
+
+KernelGenerator::KernelGenerator(const neurun::model::Operands &operand_ctx,
+ const std::shared_ptr<TensorBuilder> &tensor_builder,
+ const std::shared_ptr<custom::KernelRegistry> &kernel_registry)
+ : _ctx(operand_ctx), _tensor_builder(tensor_builder), _kernel_registry(kernel_registry),
+ _current_subg_layout(model::Layout::UNKNOWN)
+{
+ // DO NOTHING
+}
+
+void KernelGenerator::visit(const model::Subgraph &subgraph)
+{
+ _current_subg_layout = subgraph.getLayout();
+ for (const auto &e : subgraph.operations())
+ {
+ const auto &node = *(e.node);
+ _tensor_builder->preVisit(node);
+ node.accept(*this);
+ _tensor_builder->postVisit(node);
+ }
+}
+
+void KernelGenerator::visit(const model::operation::Conv2DNode &node)
+{
+ using model::operation::Conv2DNode;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(Conv2DNode::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(Conv2DNode::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(Conv2DNode::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+ // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
+ stride, ker_width, ker_height);
+ const auto activation = node.param().activation;
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+ const auto ker_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ker_index), model::Layout::UNKNOWN);
+ const auto bias_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index);
+ auto ifm_alloc = _tensor_builder->at(ifm_index);
+ auto ker_alloc = _tensor_builder->at(ker_index);
+ auto bias_alloc = _tensor_builder->at(bias_index);
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConvolutionLayer>();
+
+ fn->configure(ifm_alloc->buffer(), ifm_backend_shape, ker_alloc->buffer(), ker_backend_shape,
+ bias_alloc->buffer(), bias_backend_shape, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, activation, ofm_alloc->buffer(),
+ ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node)
+{
+ using model::operation::DepthwiseConv2DNode;
+
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(DepthwiseConv2DNode::Input::INPUT)};
+ const auto ker_index{node.getInputs().at(DepthwiseConv2DNode::Input::KERNEL)};
+ const auto bias_index{node.getInputs().at(DepthwiseConv2DNode::Input::BIAS)};
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+ // Kernel format is [1, kernel_height, kernel_width, depth_out].
+ const auto &ker_shape = _ctx.at(ker_index).shape();
+ const auto ker_height = ker_shape.dim(1);
+ const auto ker_width = ker_shape.dim(2);
+ const auto padding = neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape,
+ stride, ker_width, ker_height);
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+ const auto ker_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ker_index), model::Layout::UNKNOWN);
+ const auto bias_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+ const auto multiplier = node.param().multiplier;
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index);
+ auto ifm_alloc = _tensor_builder->at(ifm_index);
+ auto ker_alloc = _tensor_builder->at(ker_index);
+ auto bias_alloc = _tensor_builder->at(bias_index);
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::DepthwiseConvolutionLayer>();
+
+ fn->configure(ifm_alloc->buffer(), ifm_backend_shape, ker_alloc->buffer(), ker_backend_shape,
+ bias_alloc->buffer(), bias_backend_shape, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, multiplier, activation,
+ ofm_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::MaxPool2DNode &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(model::operation::MaxPool2DNode::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+ const auto padding =
+ neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::MaxPoolLayer>();
+
+ fn->configure(ifm_alloc->buffer(), ifm_backend_shape, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
+ ofm_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::AvgPool2DNode &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto ifm_index{node.getInputs().at(model::operation::AvgPool2DNode::Input::INPUT)};
+
+ const auto kh = node.param().kh;
+ const auto kw = node.param().kw;
+ const auto stride = node.param().stride;
+ const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_subg_layout);
+ const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_subg_layout);
+ const auto padding =
+ neurun::util::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride, kw, kh);
+ const auto activation = node.param().activation;
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ifm_index), _current_subg_layout);
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AvgPoolLayer>();
+
+ fn->configure(ifm_alloc->buffer(), ifm_backend_shape, padding.left, padding.right, padding.top,
+ padding.bottom, stride.horizontal, stride.vertical, kw, kh, activation,
+ ofm_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::ConcatNode &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+
+ const auto rank = _ctx.at(ofm_index).shape().rank();
+ const auto axis =
+ ::neurun::backend::cpu::kernel::getAxis(rank, node.param().axis, _current_subg_layout);
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ std::vector<::neurun::backend::cpu::kernel::Shape> ifm_backend_shapes;
+ for (auto &in_idx : node.getInputs())
+ ifm_backend_shapes.emplace_back(
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(in_idx), _current_subg_layout));
+
+ auto output_alloc = _tensor_builder->at(ofm_index).get();
+
+ std::vector<const uint8_t *> input_buffers;
+ for (auto &ifm_idx : node.getInputs())
+ input_buffers.emplace_back(_tensor_builder->at(ifm_idx).get()->buffer());
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ConcatLayer>();
+
+ fn->configure(input_buffers, ifm_backend_shapes, axis, output_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::FullyConnectedNode &node)
+{
+ using model::operation::FullyConnectedNode;
+
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(FullyConnectedNode::Input::INPUT)};
+ const auto weight_index{node.getInputs().at(FullyConnectedNode::Input::WEIGHT)};
+ const auto bias_index{node.getInputs().at(FullyConnectedNode::Input::BIAS)};
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+ const auto weight_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(weight_index), model::Layout::UNKNOWN);
+ const auto bias_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(bias_index), model::Layout::UNKNOWN);
+
+ const auto activation = node.param().activation;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+ auto weight_alloc = _tensor_builder->at(weight_index).get();
+ auto bias_alloc = _tensor_builder->at(bias_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::FullyConnectedLayer>();
+
+ fn->configure(input_alloc->buffer(), ifm_backend_shape, weight_alloc->buffer(),
+ weight_backend_shape, bias_alloc->buffer(), bias_backend_shape, activation,
+ output_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::MulNode &) { throw std::runtime_error("NYI"); }
+
+void KernelGenerator::visit(const model::operation::ReshapeNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(model::operation::ReshapeNode::Input::INPUT)};
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::ReshapeLayer>();
+
+ fn->configure(input_alloc->buffer(), ifm_backend_shape, output_alloc->buffer(),
+ ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::SoftmaxNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(model::operation::SoftmaxNode::Input::INPUT)};
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(output_index), _current_subg_layout);
+ const auto ifm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(input_index), _current_subg_layout);
+
+ const auto beta = node.param().beta;
+
+ auto output_alloc = _tensor_builder->at(output_index).get();
+ auto input_alloc = _tensor_builder->at(input_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::SoftMaxLayer>();
+
+ fn->configure(input_alloc->buffer(), ifm_backend_shape, beta, output_alloc->buffer(),
+ ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::AddNode &node)
+{
+ const auto ofm_index{node.getOutputs().at(0)};
+ const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
+ const auto rhs_index{node.getInputs().at(model::operation::AddNode::Input::RHS)};
+
+ const auto ofm_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(ofm_index), _current_subg_layout);
+ const auto lhs_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(lhs_index), _current_subg_layout);
+ const auto rhs_backend_shape =
+ ::neurun::backend::cpu::kernel::getShape(_ctx.at(rhs_index), _current_subg_layout);
+
+ const auto activation = node.param().activation;
+
+ auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+ auto lhs_alloc = _tensor_builder->at(lhs_index).get();
+ auto rhs_alloc = _tensor_builder->at(rhs_index).get();
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::AddLayer>();
+
+ fn->configure(lhs_alloc->buffer(), lhs_backend_shape, rhs_alloc->buffer(), rhs_backend_shape,
+ activation, ofm_alloc->buffer(), ofm_backend_shape);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::PermuteNode &node)
+{
+ const auto output_index{node.getOutputs().at(0)};
+ const auto input_index{node.getInputs().at(0)};
+
+ const auto &shape = _ctx.at(output_index).shape();
+ const auto input_backend_ctx = node.param().input_backend_ctx;
+ const auto output_backend_ctx = node.param().output_backend_ctx;
+ const auto data_type = node.getDataType();
+
+ output_backend_ctx->tensor_builder->preVisit(node);
+
+ auto output_object = output_backend_ctx->tensor_builder->wrapTensor(output_index);
+ auto input_object = input_backend_ctx->tensor_builder->wrapTensor(input_index);
+
+ auto fn = nnfw::cpp14::make_unique<::neurun::backend::cpu::kernel::PermuteLayer>();
+
+ // TODO Support NCHW frontend
+ auto out_shape = shape;
+ if (shape.rank() == 4 && output_object->ptr()->layout() == model::Layout::NCHW)
+ {
+ out_shape.dim(1) = shape.dim(3);
+ out_shape.dim(2) = shape.dim(1);
+ out_shape.dim(3) = shape.dim(2);
+ }
+
+ const auto permute_type = node.getPermuteType();
+ // Check Permutation Type
+ const auto inferPermuteType = [&]() {
+ if (input_object->ptr()->layout() == model::Layout::NHWC &&
+ output_object->ptr()->layout() == model::Layout::NCHW)
+ {
+ return model::operation::PermuteNode::Type::NHWC_TO_NCHW;
+ }
+ else if (input_object->ptr()->layout() == model::Layout::NCHW &&
+ output_object->ptr()->layout() == model::Layout::NHWC)
+ {
+ return model::operation::PermuteNode::Type::NCHW_TO_NHWC;
+ }
+ else
+ {
+ return model::operation::PermuteNode::Type::COPY;
+ }
+ }();
+ UNUSED_RELEASE(inferPermuteType);
+ assert(permute_type == inferPermuteType);
+
+ fn->configure(input_object, output_object, out_shape, permute_type, data_type);
+
+ input_backend_ctx->tensor_builder->postVisit(node);
+
+ _execution_builder->append(std::move(fn));
+}
+
+void KernelGenerator::visit(const model::operation::CustomNode &node)
+{
+ auto get_type_info = [this](const model::Operand &operand) -> custom::TypeInfo {
+ auto backendShape = ::neurun::backend::cpu::kernel::getShape(operand, _current_subg_layout);
+
+ custom::Shape shape(backendShape.dimensions.size());
+ for (size_t d = 0; d < backendShape.dimensions.size(); ++d)
+ {
+ shape.dim(d) = backendShape.dimensions[d];
+ }
+
+ return {shape, backendShape.type};
+ };
+
+ auto fill_op_info = [&](const model::OperandIndexSequence &opSeq,
+ std::vector<custom::TypeInfo> &types, std::vector<void *> &allocs) {
+ for (auto &idx : opSeq)
+ {
+ const auto &operand = _ctx.at(idx);
+ // TODO make sure using `_current_subg_layout` is correct for custom operations
+ types.emplace_back(get_type_info(operand));
+ auto in_alloc = _tensor_builder->at(idx)->buffer();
+ allocs.emplace_back(in_alloc);
+ }
+ };
+
+ custom::Kernel::CustomKernelConfigParams params{};
+
+ fill_op_info(node.getInputs(), params.input_types, params.input_allocations);
+ fill_op_info(node.getOutputs(), params.output_types, params.output_allocations);
+
+ params.userdata = node.userdata().data;
+ params.userdata_size = node.userdata().size;
+
+ auto fn = _kernel_registry->buildKernelForOp(node.id());
+
+ fn->configure(std::move(params));
+
+ _execution_builder->append(std::move(fn));
+}
+
+} // namespace cpu
+} // namespace backend
+} // namespace neurun